##### This script is for testing the trained dynamic model on completely independent dataset on the laptop. Since there are only restricted resourses on the laptop, i don't want to make any preprocessing of the videos for the whole dataset. Therefor I define the generator that produces data sample on demand from a randomly chosen video right before feeding it into the model.

In [25]:
#%%writefile create_tf_dataset.py
import pandas as pd
import numpy as np
import cv2
import datetime
import os
from random import shuffle,sample
import math
from tqdm import tqdm
import re
import timeit
import matplotlib.pyplot as plt
from matplotlib.cbook import flatten
%matplotlib inline

##### This cell constructs the list of all videos (the pathes) and also list of all csv-files with numerical data in the given directory. Here also the sequence length seq_len and batch_size is definded. 

In [26]:
working_directory = '../new_data/'
seq_len=6
batch_size=1

video_names = [root+'/'+file for root, _, files in os.walk(working_directory) for file in files if file.endswith('flv')]
csv_names   = [root+'/'+file for root, _, files in os.walk(working_directory) for file in files if file.endswith('csv')]

#### Creating one dataframe with numerical values collecting all the csv files together

In [27]:
wind_data=pd.DataFrame()
for path in csv_names:
    tabel=pd.read_csv(path,parse_dates=False)#.reset_index(drop=True)
    wind_data=pd.concat([wind_data,tabel]).reset_index(drop=True)

In [28]:
wind_data.tail()

Unnamed: 0,date,Avg. wind,wind gusts
708,2018-5-31-16-7,15,20
709,2018-5-31-16-12,16,19
710,2018-5-31-16-17,13,17
711,2018-5-31-16-22,12,17
712,2018-5-31-16-27,13,16


##### Function to get time stamp from the video name

In [29]:
def vn_to_time(vn):
    return datetime.datetime.strptime(vn.split('/')[-1].split('.')[0], '%Y-%m-%d-%H-%M')

##### testing the function

In [30]:
vn_to_time(video_names[0])

datetime.datetime(2018, 5, 26, 11, 58)

##### Function that produces the numerical values for given video. It finds two closest in time data points and make interpolation. It also takes care of some edge cases.

In [31]:
def wind_for_video(vn,wind_data=wind_data):
    vid_time=vn_to_time(vn) #get the timestame for the given video
    
    wind_data["time_diff"]=wind_data['date'].apply(lambda x: #get the difference between time stamps of video and all datapoints
                                                   (datetime.datetime.strptime(x,'%Y-%m-%d-%H-%M')-vid_time).total_seconds())
    wind_data['time_dif_abs']=wind_data['time_diff'].apply(abs) #get absolute values of this difference
    wd=wind_data.sort_values(['time_dif_abs']).head(2).reset_index(drop=True) #get two closest datapoints
    
    if wd.empty:  
        return None
    if wd['time_dif_abs'].min()>500: # edge case when there is no datapoints closer than 500sec. 
                                     # No data will be produced for such a video
        return None
    
    elif wd['time_dif_abs'].max()>1000: # egde case when only one data point is close to the given video
        awind=wd['Avg. wind'].loc[wd['time_dif_abs'].idxmin()]
        gwind=wd['wind gusts'].loc[wd['time_dif_abs'].idxmin()]
    
    dt = wd['time_diff'].loc[1]-wd['time_diff'].loc[0]
    if abs(dt)<1:  # Edge case when two closest datapoints almost coinside 
        awind=(wd['Avg. wind'].loc[0]+wd['Avg. wind'].loc[1])/2
        gwind=(wd['wind gusts'].loc[0]+wd['wind gusts'].loc[1])/2
    else: # a regular case when there are two points to interpolate
        awind=(wd['Avg. wind'].loc[0]*wd['time_diff'].loc[1] - wd['Avg. wind'].loc[1]*wd['time_diff'].loc[0])/dt
        gwind=(wd['wind gusts'].loc[0]*wd['time_diff'].loc[1] - wd['wind gusts'].loc[1]*wd['time_diff'].loc[0])/dt
    return [awind,gwind]
    

##### testing the function

In [32]:
wind_for_video(video_names[100])

[11.0, 16.0]

##### Create a new dataframe with three columns - video path, and two corresponding numerical values as calculated using function abow.

In [33]:
df_wind=pd.DataFrame()
for video in video_names:
    num_values=wind_for_video(video) 
    if num_values is None: #if there is no numerical value for the given video available, skip this video
        continue
    df=pd.DataFrame([[video]+num_values],columns=['video','av.wn.','wn.gs.'])
    df_wind=df_wind.append(df)
df_wind.reset_index(drop=True,inplace=True)

##### Using this dataframe makes it easy to select any subset of numerical values from the test dataset. For exapmple, we take only samples where average wind less than 5. In this way we can check the performance of the model on any subset

In [34]:
df_wind[df_wind['av.wn.']<5].reset_index(drop=True).tail()

Unnamed: 0,video,av.wn.,wn.gs.
52,../new_data/new-tangana/2018-05-31-10-18.flv,4.0,5.8
53,../new_data/new-tangana/2018-05-31-10-24.flv,2.0,6.2
54,../new_data/new-tangana/2018-05-31-10-29.flv,3.8,7.6
55,../new_data/new-tangana/2018-05-31-10-35.flv,3.2,7.0
56,../new_data/new-tangana/2018-05-31-10-40.flv,1.6,5.8


##### Function that read frames from the given video and returns sequence of frames as a numpy array. The fisrt frame is taken randomly between frame 0 and 300.

In [35]:
def read_frames(videoPath,seq_len):
    count = 0
    vidcap = cv2.VideoCapture(videoPath)
    offset=np.random.randint(0,high=300) #generates random first frame
    seq=[]
    success = True
    while success:
        success,image = vidcap.read()
        if not success:
            return None
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if count >= offset:
            seq.append(image)
            if count >= offset+seq_len-1:
                return np.stack(seq)[:,:448,:704,:]
        count = count + 1
    

##### The generator that produces random sample of data from randomly chosen video. Returns a tuple of video frame sequence in numpy format and numerical data. This generator is then used with the method "batch_predict_generator" for keras model

In [36]:
def batch_gen(seq_len,data_frame=df_wind,batch_size=1):
    batch_frames=[]
    batch_data=[]
    while True:        
        df=list(data_frame.sample().values[0]) #sample the dataframe and create list from its values
        seq=read_frames(df[0],seq_len) #read sequence of frames for the given video
        num_dat=np.array([df[1],df[2]]) # get the numerical data for the video
        if df[1] is None or df[2] is None:
            continue
        if seq is None:
            continue
        batch_frames.append(seq/255.)
        batch_data.append(num_dat)
        if len(batch_frames)>=batch_size:
            yield np.stack(batch_frames),np.stack(batch_data) 
            batch_frames=[]
            batch_data=[]   

##### testing the generator

In [37]:
gen=batch_gen(seq_len)

In [62]:
next(gen)[0].shape

(1, 6, 448, 704, 3)

##### Loading the model to test

In [7]:
import tensorflow as tf
from keras.preprocessing.image import load_img
import matplotlib.pyplot as plt
from tqdm import tqdm
import keras as keras
from keras.applications import MobileNetV2
from keras.models import Model,load_model,Sequential
from keras.layers import *
import keras.backend as K

In [8]:
keras.__version__

'2.2.2'

##### Loading the model by parts. The first convolutional base.

In [9]:
#this is convolutional base
conv_base=MobileNetV2(include_top=False)#,input_tensor=next_element[0]



##### Adding self attention on top of convolutional base 

In [10]:
inp=Input((None,None,None))
x = inp

x=conv_base(x)

# next 5 layers are attention mechanism

weights=Conv2D(1,(3,3),activation='sigmoid',padding='SAME',kernel_initializer=keras.initializers.Zeros(),
                                               bias_initializer=keras.initializers.Ones())(x)

#next two lines make the weights sum up to 1: weights->weights/sum(weights)
norm = Lambda(lambda t: 1/K.sum(t,axis=[1,2]))(weights)

weights = merge.multiply([norm,weights])

#next two lines calculate weighted average
x = merge.multiply([x,weights])

x = Lambda(lambda t: K.sum(t,axis=[1,2]))(x)

##### Defining the conv_base + self attention  part of the model

In [11]:
conv_atten=Model(inputs=[inp],outputs=[x])

##### Loading the top layers of the model

In [12]:
stat_model=load_model('../Models/mobile_mini_top_attention_3.h5',custom_objects={'relu6':ReLU(6.),'tf':tf},compile=False)

In [61]:
stat_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_16 (InputLayer)           (None, None, None, N 0                                            
__________________________________________________________________________________________________
mobilenetv2_1.00_224 (Model)    (None, None, None, 1 2257984     input_16[0][0]                   
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, None, None, 1 11521       mobilenetv2_1.00_224[1][0]       
__________________________________________________________________________________________________
lambda_9 (Lambda)               (None, 1)            0           conv2d_3[0][0]                   
__________________________________________________________________________________________________
multiply_7

##### Redefining loaded model to output prediction values instead of the loss. In this case we don't need to feed true values and loss_weights for the loss function into the model. Defined in this way the architecture will be identical with the model defined above and the trained weights can be assined. Unfurtunately loaded model does not work with TimeDistributed wrapper, because of the bug.

In [13]:
mobinet2=Model(inputs=[stat_model.layers[0].input],outputs=[stat_model.layers[6].output])

In [14]:
conv_atten.set_weights(mobinet2.get_weights())

In [15]:
conv_atten.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, None, None, N 0                                            
__________________________________________________________________________________________________
mobilenetv2_1.00_224 (Model)    (None, None, None, 1 2257984     input_4[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, None, None, 1 11521       mobilenetv2_1.00_224[1][0]       
__________________________________________________________________________________________________
lambda_3 (Lambda)               (None, 1)            0           conv2d_2[0][0]                   
__________________________________________________________________________________________________
multiply_3

In [16]:
model_trained=load_model('../Models/mobile_dynamics_abs1.h5',custom_objects={'relu6':ReLU(6.),'tf':tf},compile=False)

In [17]:
model_trained.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 15, 1280)     0                                            
__________________________________________________________________________________________________
time_distributed_2 (TimeDistrib (None, 15, 2)        2562        input_4[0][0]                    
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 13, 2)        7682        input_4[0][0]                    
__________________________________________________________________________________________________
global_average_pooling1d_1 (Glo (None, 2)            0           time_distributed_2[0][0]         
__________________________________________________________________________________________________
global_ave

##### Again redefine the inputs and outputs to predict the values instead of custom loss function

In [18]:
top_layers= Model(inputs=model_trained.inputs[0],outputs=[model_trained.layers[5].output])

##### Next layers on the top that include time convolution. There is residual connection of simple averaging in time and convolution

In [19]:
inp=Input((None,None,None,None))

x=inp

x = TimeDistributed(conv_atten)(x)

prediction = top_layers(x)

In [20]:
final_model=Model(inputs=[inp], outputs=[prediction])

In [21]:
final_model.save_weights('final_model_weights.h5')

In [46]:
wwee=final_model.get_weights()

In [49]:
import h5py
with h5py.File('f_m_w.h5', 'w') as hf:
    hf.create_dataset("weights",  data=wwee)

TypeError: Object dtype dtype('O') has no native HDF5 equivalent

In [34]:
final_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, None, None, N 0                                            
__________________________________________________________________________________________________
time_distributed_3 (TimeDistrib (None, None, 1280)   2269505     input_4[0][0]                    
__________________________________________________________________________________________________
time_distributed_4 (TimeDistrib (None, None, 2)      2562        time_distributed_3[0][0]         
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, None, 2)      7682        time_distributed_3[0][0]         
__________________________________________________________________________________________________
global_ave

In [43]:
fmjs=final_model.to_json()

In [45]:
import json
with open('final_model.json', 'w') as outfile:
    json.dump(fmjs, outfile)

In [51]:
np.save('f_m_w.npy',wwee)

In [52]:
wwee=np.load('f_m_w.npy')

In [53]:
final_model.set_weights(wwee)

In [54]:
adam = keras.optimizers.Adam(lr=.0001)

final_model.compile(optimizer=adam,loss='mse')

In [55]:
df_wind[df_wind['av.wn.']<30].describe()

Unnamed: 0,av.wn.,wn.gs.
count,513.0,513.0
mean,10.20104,13.470565
std,3.623536,3.534618
min,0.0,4.0
25%,9.0,11.6
50%,11.0,14.2
75%,12.6,16.0
max,16.8,19.6


In [56]:
df_wind[ (df_wind['av.wn.']<10)].describe()#(df_wind['av.wn.']<15) &

Unnamed: 0,av.wn.,wn.gs.
count,156.0,156.0
mean,5.730342,9.257692
std,2.932779,2.399033
min,0.0,4.0
25%,2.8,7.0
50%,6.0,9.6
75%,8.7,11.0
max,9.8,15.0


In [58]:
gen=batch_gen(seq_len,data_frame=df_wind[df_wind['av.wn.']<10])

In [59]:
corr=0
incorr=0
for i in range(200):
    inp=next(gen)
    if inp[1] is None:
        continue
    pred=final_model.predict_on_batch(inp[0])
    true_val=inp[1]
    err=np.mean(np.abs(pred-true_val))
    if err<5.:
        corr+=1
    else:
        incorr+=1
    print (pred,true_val)
    print (err)
    print ('Correct:', corr, 'Incorrect:', incorr)
    
        
    

[[ 7.378753 13.235353]] [[2.6 6.6]]
5.707053327560425
Correct: 0 Incorrect: 1
[[ 9.3112135 17.225136 ]] [[ 7. 12.]]
3.768174648284912
Correct: 1 Incorrect: 1
[[10.501867 15.250656]] [[ 8. 10.]]
3.8762617111206055
Correct: 2 Incorrect: 1
[[ 9.453521 16.460003]] [[ 7. 12.]]
3.4567618370056152
Correct: 3 Incorrect: 1
[[10.926006 15.058206]] [[ 8.6 12.2]]
2.592105960845948
Correct: 4 Incorrect: 1
[[ 9.969875 14.415662]] [[ 8. 11.]]
2.6927685737609863
Correct: 5 Incorrect: 1
[[10.712363 16.83792 ]] [[ 9. 14.]]
2.2751412391662598
Correct: 6 Incorrect: 1
[[ 7.989066 14.010383]] [[2.6 6.6]]
6.399724388122559
Correct: 6 Incorrect: 2
[[11.546982 17.834723]] [[5. 7.]]
8.690852165222168
Correct: 6 Incorrect: 3
[[ 7.975912 14.258844]] [[ 8. 10.]]
2.1414661407470703
Correct: 7 Incorrect: 3
[[ 5.2379937 10.315735 ]] [[2.8 6.8]]
2.976864290237427
Correct: 8 Incorrect: 3
[[5.3039865 9.194939 ]] [[0.8 4. ]]
4.849462604522705
Correct: 9 Incorrect: 3
[[ 7.142667 10.73019 ]] [[ 8. 10.]]
0.7937617301940918


In [34]:
corr=0
incorr=0
for i in range(200):
    inp=next(gen)
    if inp[1] is None:
        continue
    pred=final_model.predict_on_batch(inp[0])
    true_val=inp[1]
    err=np.mean(np.abs(pred-true_val))
    if err<5.:
        corr+=1
    else:
        incorr+=1
    print (pred,true_val)
    print (err)
    print ('Correct:', corr, 'Incorrect:', incorr)
    
        
    

[[ 9.777857 16.133057]] [[11.6 15.4]]
1.2775999069213864
Correct: 1 Incorrect: 0
[[13.52682  20.527515]] [[15.6 19.4]]
1.6003476142883306
Correct: 2 Incorrect: 0
[[12.063027 18.19981 ]] [[12. 16.]]
1.1314187049865723
Correct: 3 Incorrect: 0
[[11.61421 18.40131]] [[11.6 14.6]]
1.907760047912598
Correct: 4 Incorrect: 0
[[11.996664 20.284481]] [[14. 18.]]
2.1439085006713867
Correct: 5 Incorrect: 0
[[11.761147 19.546942]] [[ 9.8 11. ]]
5.254044151306152
Correct: 5 Incorrect: 1
[[14.057644 23.599085]] [[12.4 16.4]]
4.4283643722534185
Correct: 6 Incorrect: 1
[[14.5366535 20.48994  ]] [[15.6 19.4]]
1.076643562316895
Correct: 7 Incorrect: 1
[[ 8.07262  12.829642]] [[6.4 9.6]]
2.4511313438415527
Correct: 8 Incorrect: 1
[[12.118086 23.251358]] [[12.6 15.4]]
4.1666360855102536
Correct: 9 Incorrect: 1
[[13.9980345 22.434345 ]] [[11.4 15. ]]
5.016189861297607
Correct: 9 Incorrect: 2
[[ 9.938065 14.361513]] [[10. 12.]]
1.2117242813110352
Correct: 10 Incorrect: 2
[[12.067005 17.431953]] [[12.2 17. ]]

In [89]:
final_model.save('final_model.h5')

In [36]:
gen=batch_gen(seq_len,data_frame=df_wind[df_wind['av.wn.']<10])

corr=0
incorr=0
for i in range(200):
    inp=next(gen)
    if inp[1] is None:
        continue
    pred=final_model.predict_on_batch(inp[0])
    true_val=inp[1]
    err=np.mean(np.abs(pred-true_val))
    if err<5.:
        corr+=1
    else:
        incorr+=1
    print (pred,true_val)
    print (err)
    print ('Correct:', corr, 'Incorrect:', incorr)
    
        
    

[[ 5.3107204 10.037434 ]] [[2.4 7. ]]
2.974077033996582
Correct: 1 Incorrect: 0
[[ 6.376713 10.793888]] [[2.4 7. ]]
3.8853004455566404
Correct: 2 Incorrect: 0
[[10.4244585 17.448082 ]] [[ 9. 15.]]
1.9362702369689941
Correct: 3 Incorrect: 0
[[ 6.23094  10.409716]] [[2.4 7. ]]
3.6203277587890623
Correct: 4 Incorrect: 0
[[4.4756    7.9269476]] [[1.6 5.2]]
2.801273679733276
Correct: 5 Incorrect: 0
[[10.447187 15.912512]] [[ 8.6 10.8]]
3.479849624633789
Correct: 6 Incorrect: 0
[[ 7.7729096 13.497505 ]] [[ 9. 14.]]
0.8647925853729248
Correct: 7 Incorrect: 0
[[ 7.679663 12.08914 ]] [[ 9.2 11.2]]
1.2047383785247803
Correct: 8 Incorrect: 0
[[ 7.3812423 13.228802 ]] [[2.6 6.6]]
5.70502200126648
Correct: 8 Incorrect: 1
[[4.3370647 9.462797 ]] [[5. 9.]]
0.5628662109375
Correct: 9 Incorrect: 1
[[ 7.2820845 12.748032 ]] [[5. 7.]]
4.0150580406188965
Correct: 10 Incorrect: 1
[[4.658039 9.260036]] [[1.6 5.2]]
3.5590377807617184
Correct: 11 Incorrect: 1
[[ 9.699636 16.279198]] [[ 9.2 11.2]]
2.7894170761