In [18]:
import pandas as pd
import skvideo.io  
import matplotlib.pyplot as plt
import numpy as np
import cv2
from sklearn.decomposition import PCA

In [3]:

from keras.preprocessing import image
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from PIL import Image as pil_image

model_vgg16_conv = VGG16(weights='imagenet', include_top=True)
print(model_vgg16_conv.summary())

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [4]:
videodata = skvideo.io.vread("shotgun.mp4")  


In [5]:
videodata.shape

(4875, 360, 640, 3)

In [6]:
df = pd.read_csv("data.csv")

In [7]:
left= 133.51
height = 110.23
top = 20.4
width = 90.18
def extract_image(frame,left,top,width,height):
    cutshot = videodata[int(frame),int(top*400/240):int((top+height)*400/240),int(left*720/320):int((left+width)*720/320),:]
    w,h = cutshot.shape[0],cutshot.shape[1]
    if w>h:
        return np.pad(cutshot,((0,0),(0,w-h),(0,0)))
    else: 
        return np.pad(cutshot,((0,h-w),(0,0),(0,0)))
    

In [8]:
df

Unnamed: 0.1,Unnamed: 0,frame,tracker,left,top,width,height
0,0,10,0,108.000000,58.000000,99.000000,119.000000
1,1,11,0,107.899350,57.582835,99.179957,119.216683
2,2,12,0,107.583443,57.037336,99.494549,119.595477
3,3,13,0,107.499589,56.945497,99.545947,119.657365
4,4,14,0,107.297973,56.815513,99.679828,119.818568
...,...,...,...,...,...,...,...
2881,2881,4818,53,265.833177,52.515181,74.751430,93.189287
2882,2882,4819,53,269.753925,51.270163,75.104538,93.630672
2883,2883,4820,53,272.199842,47.525492,75.153007,93.691259
2884,2884,4821,53,276.426418,47.086039,74.939077,93.423846


In [9]:
sections = df.apply(lambda x: extract_image(x['frame'],x['left'],x['top'],x['width'],x['height']),axis=1)

In [10]:
sections = sections.apply(lambda x:  cv2.resize(x, dsize=(224, 224), interpolation=cv2.INTER_CUBIC))

In [11]:
sections = sections.apply(lambda x:  x.reshape(1,224,224,3))

In [12]:
sections = np.concatenate(sections.values)

In [13]:
sections.shape

(2886, 224, 224, 3)

In [14]:
model = Model(inputs=model_vgg16_conv.input,
          outputs=model_vgg16_conv.get_layer('fc2').output)


# preprocess your image to be able to enter the neural network
inputs = preprocess_input(sections[:,:,:,:])
#predict image features
images_features = model.predict(inputs)
vector = images_features[0]
print(vector.shape)
# (4096,)
print(vector)

(4096,)
[0.        0.        0.        ... 0.        0.        1.9274929]


In [33]:
pca = PCA(n_components = 100 )
pca.fit(images_features.T)#[0].reshape(1, -1))

PCA(n_components=100)

In [34]:
print(pca.components_.shape)

(100, 2886)


In [35]:
pca.components_

array([[ 0.0217439 ,  0.02106977,  0.02155024, ...,  0.0070121 ,
         0.00665088,  0.0069429 ],
       [ 0.00749937,  0.00857991,  0.00848245, ...,  0.00845806,
         0.00769453,  0.00815036],
       [ 0.0151543 ,  0.01672979,  0.01749482, ...,  0.01569151,
         0.01513767,  0.01532258],
       ...,
       [-0.00116726, -0.01071787, -0.01490478, ...,  0.04736587,
         0.04700184,  0.05160125],
       [-0.0331476 , -0.03069482, -0.0280892 , ...,  0.00673636,
         0.00315669,  0.0061894 ],
       [ 0.00634898,  0.01231085,  0.01891335, ...,  0.06425433,
         0.06319207,  0.05827002]], dtype=float32)

In [36]:
from sklearn.cluster import KMeans

In [40]:
kmeans = KMeans(n_clusters=2, random_state=0).fit(pca.components_.T)


In [44]:
kmeans.labels_

array([1, 1, 1, ..., 1, 1, 1], dtype=int32)