# Setup & Training Script
To be run after new videos are added

Based on the article [Guide to Build Video Classification Model](https://www.analyticsvidhya.com/blog/2019/09/step-by-step-deep-learning-tutorial-video-classification-python/)


In [1]:
!pip3 install opencv-python



### Importing Libraries

In [None]:
import os, os.path
import cv2  # for caputring videos
import math # for mathematical operations
import matplotlib.pyplot as plt # for plotting the images
%matplotlib inline
import pandas as pd
import numpy as np # for mathematical operations
from keras.utils import np_utils
from numpy import genfromtxt
from skimage.transform import resize # for resizing images
from sklearn.model_selection import train_test_split
from glob import glob
from tqdm import tqdm

import keras
from keras.models import Sequential, load_model
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, InputLayer, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.applications.vgg16 import preprocess_input

 # Turn the csv files into dictionaries
1. Opens and converts csv file
2. Gets tags
3. Gets points for each frame
4. Reshapes each frame array
5. Groups frames by three

In [None]:
# going into folder with training data
path="dataPoints_training/"

# creating a pandas dataframe 
train = pd.DataFrame()

# going into individual files
for filename in glob(os.path.join(path, '*.csv')):
    # go into each file and add all of the data into the train dataframe
    file = filename.split("/")[1]  # gets individual file name
    train = pd.read_csv(path + file, header=None)  # adds data to dataframe

    # creating empty dictionary named trainingFrames_FILENAME
    locals()['trainingFrames{}'.format("_" + file.split(".csv")[0])] = {}
    # creating empty dictionary named trainingCombo_FILENAME
    locals()['trainingCombo{}'.format("_" + file.split(".csv")[0])] = {}
    
    #getting tag for each set of points from name of file
    tag =filename.split("/")[1].split("_")[3].split(".csv")[0]
    # creating tag key for each trainingCombo dictionary and adding tag value
    locals()['trainingCombo{}'.format("_" + file.split(".csv")[0])]["tag"] = tag

    # number of points per frame
    n = 12
    
    # create array for each frame and adds them to dictionary
    for i in range(len(train) // 12):
        # grouping each frame and making one big array (with 12 x,y points) aka grouping 12 arrays into one array
        data = train.to_numpy()[i * n:(i + 1) * n]
        # deleting frame number from above array
        data = np.delete(data, 0, 1)
        # reshaping each frame array into 24, 1
        data = np.reshape(data, (24, 1))
        # creating tag with name of number of frame and adding above array as value into trainingFrames
        locals()['trainingFrames{}'.format("_" + file.split(".csv")[0])]['{}'.format(i)] = data
        

    # goes into each trainingFrame dictionary
    for i in locals()['trainingFrames{}'.format("_" + file.split(".csv")[0])]:
        # if the point (???) is 
        if int(i) < (len(locals()['trainingFrames{}'.format("_" + file.split(".csv")[0])]))-2:
            # counters for one and two above current frame respectively
            j = int(i)+1
            k = int(i)+2
            
            # creating local varriables that store current frame array and the two following
            combine = locals()['trainingFrames{}'.format("_" + file.split(".csv")[0])]['{}'.format(i)]
            combineTwo = locals()['trainingFrames{}'.format("_" + file.split(".csv")[0])]['{}'.format(j)]
            combineThree = locals()['trainingFrames{}'.format("_" + file.split(".csv")[0])]['{}'.format(k)]
            
            # appending current frame with the following two frames
            combine = np.append(combine, combineTwo, axis=1)
            combine = np.append(combine, combineThree, axis=1)
            
            # creating tag with name of number of frame and adding above combo into trainingCombo
            locals()['trainingCombo{}'.format("_" + file.split(".csv")[0])]['{}'.format(i)] = combine  
# print(trainingCombo_training_file_1_g)

# Put all the training frames into a dummy thicc array and make them tags numerical

In [None]:
# going into folder with training data
path="dataPoints_training/"

# create two empty arrays
points = []
tags = []

# going into individual files
for filename in glob(os.path.join(path, '*.csv')): 
    # go into each file and add all of the data into the train dataframe
    file = filename.split("/")[1]
    
    size = len(locals()['trainingCombo{}'.format("_" + file.split(".csv")[0])])
    
    for i in range(0, size-1):
        val = list(locals()['trainingCombo{}'.format("_" + file.split(".csv")[0])]["{}".format(i)])
        points.append(val)
    
    # add a tag for each frame in trainingCombo (??)
    for i in range(0, size-1):
        tags.append(locals()['trainingCombo{}'.format("_" + file.split(".csv")[0])]["tag"])
    
X = np.array(points)

tags = pd.DataFrame(tags)

# replace tags of "b" or "g" with 1 and 0. (1 represents bad and 0 represents good)
tags = tags.replace("b", 1)
tags = tags.replace("g", 0)


# Split models

In [None]:
# https://www.bitdegree.org/learn/train-test-split
# discuss more in depth with RW

X_train, X_valid, y_train, y_valid = train_test_split(X, tags, test_size=0.2)    # preparing the validation set

### Shape into single dimension

In [None]:
X.shape, X_train.shape, X_valid.shape

In [None]:
y_train.shape, y_valid.shape

In [None]:
X_train.shape, X_valid.shape

In [None]:
X_train = X_train.reshape(y_train.shape[0], 24*3)
X_valid = X_valid.reshape(y_valid.shape[0], 24*3)

# Build dat model

In [None]:
model = Sequential()
model.add(InputLayer((24*3,)))    # input layer
model.add(Dense(units=10, activation='sigmoid', input_shape=(24*3,))) # hidden layer
model.add(Dense(2, activation='softmax'))    # output layer

In [None]:
model.summary()

# Weights file

In [None]:
# defining a function to save the weights of best model
from keras.callbacks import ModelCheckpoint
mcp_save = ModelCheckpoint('weight.hdf5', save_best_only=True, monitor='val_loss', mode='min')

# Compile model

In [None]:
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit model

Epoch ― In the context of training a model, refers to one iteration where the model sees the whole training set to update its weights.

In [None]:
model.fit(X_train, y_train, epochs=15, validation_data=(X_valid, y_valid), callbacks=[mcp_save], batch_size=3)

## What does this show us?

- ranges from 45-68%

In [None]:
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

## Accuracy of score:

In [None]:
# fine-tuning the model architecture
model = Sequential()
model.add(InputLayer((24*3,)))    # input layer
model.add(Dense(units=10, activation='sigmoid', input_shape=(24*3,))) # hidden layer
model.add(Dense(2, activation='softmax'))    # output layer

In [None]:
model = load_model('weight.hdf5')
model.compile(loss='sparse_categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])

# creating two lists to store predicted and actual tags
predict = []
nPredict = [0] 
actual = [] 

# putting actual tags of frames into actual[]
act = y_valid.to_numpy().tolist()
for item in act:
    actual.append(item)

# converting predictionTags list to same format as actual[] and putting it into predict[]
predictionTags = model.predict_classes(X_valid)
for i in range(len(predictionTags)):
    nPredict[0] = predictionTags[i]
    predict.append(nPredict[0])

In [None]:
print("length of test:",len(predict),len(actual),'\n')
print('predict tags', predict)
print('actual tags', actual)

print('\nnum of differences', abs(predict.count(0)-actual.count([0])))

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(predict, actual)*100

# Input video to obtain score:

In [None]:
# going into folder with training data
path="dataPoints_input/"

# creating a pandas dataframe 
train = pd.DataFrame()

# get name of csv, assuming it's called vids
file = "vid.csv"

train = pd.read_csv(path + file, header=None)

# creating empty dictionary named videoFrames_FILENAME
locals()['videoFrames{}'.format("_" + file.split(".csv")[0])] = {}
# creating empty dictionary named videoCombo_FILENAME
locals()['videoCombo{}'.format("_" + file.split(".csv")[0])] = {}

# number of points per frame
n = 12
    
# create array for each frame and adds them to dictionary
for i in range(len(train) // 12):
    # grouping each frame and making one big array (with 12 x,y points) aka grouping 12 arrays into one array
    data = train.to_numpy()[i * n:(i + 1) * n]
    # deleting frame number from above array
    data = np.delete(data, 0, 1)
    # reshaping each frame array into 24, 1
    data = np.reshape(data, (24, 1))
    # creating tag with name of number of frame and adding above array as value into trainingFrames
    locals()['videoFrames{}'.format("_" + file.split(".csv")[0])]['{}'.format(i)] = data
# print(videoFrames_vid)

# goes into each trainingFrame dictionary
for i in locals()['videoFrames{}'.format("_" + file.split(".csv")[0])]:
    # if the point (???) is 
    if int(i) < (len(locals()['videoFrames{}'.format("_" + file.split(".csv")[0])]))-2:
        # counters for one and two above current frame respectively
        j = int(i)+1
        k = int(i)+2
            
        # creating local varriables that store current frame array and the two following
        combine = locals()['videoFrames{}'.format("_" + file.split(".csv")[0])]['{}'.format(i)]
        combineTwo = locals()['videoFrames{}'.format("_" + file.split(".csv")[0])]['{}'.format(j)]
        combineThree = locals()['videoFrames{}'.format("_" + file.split(".csv")[0])]['{}'.format(k)]
            
        # appending current frame with the following two frames
        combine = np.append(combine, combineTwo, axis=1)
        combine = np.append(combine, combineThree, axis=1)
            
        # creating tag with name of number of frame and adding above combo into trainingCombo
        locals()['videoCombo{}'.format("_" + file.split(".csv")[0])]['{}'.format(i)] = combine  
        
# print(videoFrames_vid['0'])
# # print(len(videoFrames_vid['0']))
# print(videoCombo_vid)

In [None]:
isize = len(locals()['videoCombo{}'.format("_" + file.split(".csv")[0])])

pts = []

for i in range(0, isize-1):
    val = list(locals()['videoCombo{}'.format("_" + file.split(".csv")[0])]["{}".format(i)])
    pts.append(val)
    
data = np.array(pts)
X = data.reshape(data.shape[0], 24*3)

p = []
nP = [0]

pTags = model.predict_classes(X)
print(pTags)
for i in range(len(pTags)):
    nP[0] = pTags[i]
    p.append(nP[0])
    
# go into predict and calc the percentages of good vs bad ie 0 vs 1
numGood = 0
size = len(p)
for i in range(len(p)):
    numGood = p.count(0)
accuracy = numGood/size * 100
print(accuracy)

# where in video is it bad (1) ?
for i in range(len(p)):
    if p[i] == 1:
        print(i,end='  ')