#### In this notebook the model is created to perform online inference on the given video. The model takes as an input one frame at a time, along with some features that have been calculated on previous timesteps, and produces prediction on each timestep.

In [61]:
import tensorflow as tf
import keras
import keras.backend as K
from keras.models import Model, Sequential
from tensorflow.keras.models import load_model, model_from_json
from keras.applications import MobileNetV2
from keras.layers import *
import cv2
import json
import numpy as np
import pandas as pd
from matplotlib.pyplot import imshow
import matplotlib.pyplot as plt
from scipy.misc import imresize
from IPython.display import HTML
%matplotlib inline

In [2]:
keras.__version__

'2.2.2'

##### First we download the trained model that make prediction based on sequence of 15 frames. Beacause of the bug in keras the model cannot be loaded from the saved keras model. As a walkaround I save separately the configuration in json format and weights as numpy array in npy format. Then the model is rebuild from these data.

In [3]:
with open("final_model.json", "r") as json_file:
    fmjs=json.load(json_file)

wwee=np.load('f_m_w.npy')

final_model=model_from_json(fmjs)

final_model.set_weights(wwee)

##### Next step is to create the building blocks for the sliding model. First, the convolutional base from MobileNetV2

In [4]:
conv_base=MobileNetV2(include_top=False)



##### Second adding self attention mechanism on top of convolutional block. After that load the weights from trained model. 

In [5]:
inp=Input((None,None,None))
x = inp

x=conv_base(x)

# next 5 layers are attention mechanism

weights=Conv2D(1,(3,3),activation='sigmoid',padding='SAME',kernel_initializer=keras.initializers.Zeros(),
                                               bias_initializer=keras.initializers.Ones())(x)

#next two lines make the weights sum up to 1: weights->weights/sum(weights)
norm = Lambda(lambda t: 1/K.sum(t,axis=[1,2]))(weights)

weights = merge.multiply([norm,weights])

#next two lines calculate weighted average
x = merge.multiply([x,weights])

x = Lambda(lambda t: K.sum(t,axis=[1,2]))(x)

conv_base_att=Model(inputs=[inp],outputs=[x,weights])

conv_base_att.set_weights(final_model.layers[1].layer.get_weights())

##### Next two blocks that connected residually are dense layer and convolutional layer from dynamic model. The weights are also downloaded from trained model.

In [6]:
inp=Input((1280,))
x=Dense(2)(inp)
dense1=Model(inputs=[inp],outputs=[x])
dense1.set_weights(final_model.layers[-1].layers[1].layer.get_weights())

In [7]:
inp=Input((None,1280))
x=Conv1D(2,3,padding=
        'valid')(inp)
conv1D=Model(inputs=[inp],outputs=[x])
conv1D.set_weights(final_model.layers[-1].layers[2].get_weights())

##### Now that we have all necessary building blocks, we can construct the final sliding model. The model takes as an input 1) the frame as a numpy array of the form [width, hight,color] 2) the time position of the frame frame_nr that starts from 0 for the first frame extracted from the video 3) features from two previous frames extracted by convolution base + attention (the first building block of the model) 4) values predicted on the previous time step to calculate moving average. As outputs, the model produces 1) prediction values 2) the attention mask 3) features for the current frame extracted by convolution base + attention, to use it on the next time steps 

In [8]:
frame=Input((None,None,None))
frame_nr=Input((1,),dtype='int32')
feature1=Input((1280,))
feature2=Input((1280,))
moving_ag = Input((2,))


feat0,attention = conv_base_att(frame)

avg = dense1(feat0)

#edge case of first and second frame. In the case if there is no previous frame, the previous frame is set to current
feat1=Lambda(lambda t: K.switch(t[0]>=1,t[1],t[2]))([frame_nr,feature1,feat0])
feat2=Lambda(lambda t: K.switch(t[0]>=2,t[1],t[2]))([frame_nr,feature2,feat1])

#time convolution. 
seq = Lambda(lambda x: K.stack([x[0],x[1],x[2]],axis=1))([feat2,feat1,feat0])
conv_t=conv1D(seq)
conv_t=Flatten()(conv_t)

curr_pred=Add()([avg,conv_t])

# edge case of the first frame when no moving average was calculated on the previous step
mov_av=Lambda(lambda t: K.switch(t[0]>=1,t[1],t[2]))([frame_nr,moving_ag,curr_pred])

#the weights in the moving average are taken to be a=14/15, b=1/15
mov_av=Lambda(lambda x: x*14./15.)(mov_av)
curr_pred=Lambda(lambda x: x/15.)(curr_pred)

prediction = Add()([mov_av,curr_pred])

In [9]:
sliding_model=Model(inputs=[frame_nr,frame,feature1,feature2,moving_ag],
                    outputs=[prediction,attention,feat0])

##### No we can use the constructed model to make inferences on the videos from test dataset.

In [57]:
wind_df=pd.read_csv('../new_data//test_data.csv') #dataframe containing path to the video and corresponding labels
subset_df=wind_df[wind_df['av.wn.']>10] #taking a subset from the test data using condition (eg. only samples with av.wind<10)
sample = list(subset_df.sample().values[0])[:3] #take one sample from the dataframe
videoPath=sample[0] # extract video path from the sample
true_val=sample[1:3] # extract target values from the sample

In [58]:
# define parameters to add text on the frames

font                   = cv2.FONT_HERSHEY_COMPLEX
fontScale              = 1
fontColor              = (255,255,255)
lineType               = 2

##### This is the main loop which 1)extract frame from the video, 2)make prediction using the model for a given frame taking previous feature and predictions, 3)(optional) displays frame in the real time. The inference time on the normal CPU (~5sec per frame) is unfortunatelly too big to be able to play video without slowing it down 4)add predicted and target values on the frame and save the frame to a new video file

In [59]:
vidcap = cv2.VideoCapture(videoPath)
video = cv2.VideoWriter('video{}.avi'.format(videoPath.split('/')[-1].split('.')[0]), cv2.VideoWriter_fourcc(*"MJPG"), 10,(704,448))
fr_nr=np.zeros((1,1))
f1=np.zeros((1,1280))
f2=np.zeros((1,1280))
prediction=np.zeros((1,2))
success = True
while success:
    success,image = vidcap.read()
    if not success:
        break
    image = image[:448,:704,:]
    frame = np.array([cv2.cvtColor(image, cv2.COLOR_BGR2RGB)/255.])
    inputs=[fr_nr,frame,f1,f2,prediction]
    prediction,attention,feat0 = sliding_model.predict(inputs)
    feature2=feature1
    feature1=feat0
    img=np.expand_dims(imresize(np.squeeze(attention),image.shape[0:2])/255.,axis=-1)*image
    img = img.astype(np.uint8)
    
    img=cv2.putText(img,'target',
                (400,380), 
                font, 
                fontScale,
                fontColor,
                lineType)
    img=cv2.putText(img,'Avg.wind={0:.1f}'.format(true_val[0]),
                (400,410), 
                font, 
                fontScale,
                fontColor,
                lineType) 
    img=cv2.putText(img,'Wind gust={0:.1f}'.format(true_val[1]),
                (400,440), 
                font, 
                fontScale,
                fontColor,
                lineType)
    img=cv2.putText(img,'prediction',
                (15,380), 
                font, 
                fontScale,
                fontColor,
                lineType) 
    img=cv2.putText(img,'Avg.wind={0:.1f}'.format(prediction[0][0]),
                (15,410), 
                font, 
                fontScale,
                fontColor,
                lineType) 
    img=cv2.putText(img,'Wind gust={0:.1f}'.format(prediction[0][1]),
                (15,440), 
                font, 
                fontScale,
                fontColor,
                lineType)
    cv2.imshow('frame',img)
    video.write(img)
    print(prediction)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print('Quit.')
        break
vidcap.release()
video.release()
cv2.destroyAllWindows()        

  if issubdtype(ts, int):
  elif issubdtype(type(size), float):


[[ 9.773165 14.619051]]
[[ 9.530894 14.469444]]
[[ 9.976809 14.540912]]
[[ 9.588535 14.211448]]
[[ 9.575058 14.132329]]
[[ 9.651064 14.452839]]
[[ 9.905109 14.466189]]
[[ 9.675811 14.477369]]
[[ 9.655557 14.239512]]
[[ 9.713745 14.417729]]
[[ 9.661634 14.245752]]
[[ 9.833809 14.331382]]
[[ 9.7833   14.183359]]
[[10.06549 14.55429]]
[[ 9.951974  14.4823265]]
[[ 9.908804 14.334339]]
[[ 9.861171 14.257081]]
[[10.084898 14.730763]]
[[10.09659  14.608562]]
[[10.066954 14.621211]]
[[10.431585 14.830928]]
[[10.37924  14.861015]]
[[10.240153 14.802098]]
[[10.286678 14.570804]]
[[10.002339 14.296959]]
[[10.305837 14.695758]]
[[10.309497 14.841563]]
[[ 9.959859 14.578887]]
[[ 9.567806 14.304322]]
[[ 9.50858  14.208578]]
[[ 9.6341095 14.492869 ]]
[[ 9.521024 14.210741]]
[[10.128667 15.094796]]
[[10.314958 15.335717]]
[[10.379383 15.183701]]
[[10.270805 15.458745]]
[[10.281868 15.308438]]
[[10.175436 15.049285]]
[[10.470042 15.455546]]
[[10.360656 15.562513]]
[[10.307044 15.163985]]
[[10.358723 14

[[ 8.72452  13.891804]]
[[ 9.089265 14.035554]]
[[ 8.877127 13.886024]]
[[ 8.967444 14.204652]]
[[ 8.874712 13.994021]]
[[ 8.783801 13.819455]]
[[ 8.944158 14.086453]]
[[ 9.13249 14.22373]]
[[ 9.240103 14.264879]]
[[ 9.284067 14.454967]]
[[ 9.7259655 14.844502 ]]
[[ 9.744872 14.566371]]
[[ 9.845884 14.716819]]
[[10.039389 14.612426]]
[[ 9.753643 14.619682]]
[[10.208519 14.767121]]
[[ 9.74829   14.3641615]]
[[ 9.629368 14.504681]]
[[ 9.513436 14.562508]]
[[ 9.817241 14.963206]]
[[ 9.379564 14.492187]]
[[ 9.520203 14.642493]]
[[ 9.59428  14.641904]]
[[ 9.59107  14.653282]]
[[ 9.546084 14.772841]]
[[ 9.57965  14.583013]]
[[ 9.480843 14.467406]]
[[ 9.526265 14.55378 ]]
[[ 9.660273 14.622607]]
[[ 9.539658 14.428331]]
[[ 9.697622 14.474135]]
[[ 9.762317 14.424205]]
[[ 9.837044 14.45956 ]]
[[ 9.697866 14.201281]]
[[ 9.562385 13.925817]]
[[ 9.60287  14.024602]]
[[ 9.883573 14.305622]]
[[ 9.915479 14.266358]]
[[ 9.690456 14.087313]]
[[ 9.797284 14.34282 ]]
[[ 9.700375 14.125054]]
[[ 9.750821 14

##### The generated video is shown below

In [63]:
from IPython.display import HTML
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/NcxbpOcWyI4?rel=0&amp;controls=0&amp;showinfo=0" frameborder="0" allowfullscreen></iframe>')

In [140]:
cv2.destroyAllWindows() 