# load images, sort, dump to pickle file

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow import keras
#import tensorflow_hub as hub
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2

In [None]:
WorkingDir = 'F:/'
TestFrames1DIR = WorkingDir + 'TestFrames1.2/'
TestFrames2DIR = WorkingDir + 'TestFrames2.2/'
TestFrames3DIR = WorkingDir + 'TestFrames3.2/'
TrainFrames1DIR = WorkingDir + 'TrainFrames1.2/'
TrainFrames2DIR = WorkingDir + 'TestFrames2.2/'
TrainFrames3DIR = WorkingDir + 'TestFrames3.2/'

CATEGORIES = ['ApplyEyeMakeup', 'ApplyLipstick', 'Archery', 'BabyCrawling', 'BalanceBeam', 'BandMarching',
              'BaseballPitch', 'Basketball', 'BasketballDunk', 'BenchPress', 'Biking', 'Billiards', 'BlowDryHair',
              'BlowingCandles', 'BodyWeightSquats', 'Bowling', 'BoxingPunchingBag', 'BoxingSpeedBag', 'Breaststroke',
              'BrushingTeeth', 'CleanandJerk', 'CliffDiving', 'CricketBowling', 'CricketShot', 'CuttingInKitchen',
              'Diving', 'Drumming', 'Fencing', 'FieldHockeyPenalty', 'FloorGymnastics', 'FrisbeeCatch', 'FrontCrawl',
              'GolfSwing', 'Haircut', 'HammerThrow', 'Hammering', 'HandstandPushups', 'HandstandWalking',
              'HeadMassage', 'HighJump', 'HorseRace', 'HorseRiding', 'HulaHoop', 'IceDancing', 'JavelinThrow',
              'JugglingBalls', 'JumpRope', 'JumpingJack', 'Kayaking', 'Knitting', 'LongJump', 'Lunges',
              'MilitaryParade', 'Mixing', 'MoppingFloor', 'Nunchucks', 'ParallelBars', 'PizzaTossing',
              'PlayingGuitar', 'PlayingPiano', 'PlayingTabla', 'PlayingViolin', 'PlayingCello', 'PlayingDaf',
              'PlayingDhol', 'PlayingFlute', 'PlayingSitar', 'PoleVault', 'PommelHorse', 'PullUps', 'Punch',
              'PushUps', 'Rafting', 'RockClimbingIndoor', 'RopeClimbing', 'Rowing', 'SalsaSpin', 'ShavingBeard',
              'Shotput', 'SkateBoarding', 'Skiing', 'Skijet', 'SkyDiving', 'SoccerJuggling', 'SoccerPenalty',
              'StillRings', 'SumoWrestling', 'Surfing', 'Swing', 'TableTennisShot', 'TaiChi', 'TennisSwing',
              'ThrowDiscus', 'TrampolineJumping', 'Typing', 'UnevenBars', 'VolleyballSpiking', 'WalkingWithDog',
              'WallPushups', 'WritingOnBoard', 'YoYo']


In [None]:
def load_videos(directory, max_categories=101): # by default uses all categories
    data = []
    cat_num = 0
    
    for category in CATEGORIES:
        if(cat_num >= max_categories):
            break
        pathCat = os.path.join(directory, category)
        pathVid = directory + str(category)

        print( str(cat_num) + ': ' + category )
        for video in os.listdir(pathVid):
            pathImg = pathVid + '/' + str(video)
            
            for img in os.listdir(pathImg):
                img_array = cv2.imread(pathImg + '/' + img)
                data.append([np.array(img_array), cat_num])
            
        cat_num += 1
    print('done')
    return data

In [None]:
def sort_by_frame(testing_data):
    X = []
    y = []


    for features, label in testing_data:
        # put data from 0-255 into scale of 0-1
        X.append(features / 255.0)
        y.append(label)


    X = np.array(X) # convert to numpy array

    i=0
    # 20 arrays
    # X2[0] holds frame 0 of each video, X2[1] holds frame 1, etc.
    X2 = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]

    # holds the category tag of the videos; y2[0] is for the 0th frames, etc.
    y2 = []

    # split each viedo along the columns of X2
    for frame in X:
        X2[ i % 20 ].append(frame)

        if i % 20 == 0:
            # add the video's tag to y2
            y2.append(y[i]) 

        i+=1
        if i % 1000 == 0:
            print('frame ' + str(i) + ' processed')

    X = None # free up storage

    print('done')
    return X2, y2

In [None]:
def get_for_predict(data, index):
    temp = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
    
    i=0
    for column in data:
        temp[i].append(column[index])
        i+=1
    
    return temp  # one set of 20 frames

In [None]:
def quickdump(data, name):
    directory = 'pickle/' + name
    pickle_out = open(directory, 'wb')
    pickle.dump(data, pickle_out, protocol=4)
    pickle_out.close()

## Load in Testing1 Data

In [None]:
testing_data1 = load_videos(TestFrames1DIR)

In [None]:
X_test1, y_test1 = sort_by_frame(testing_data1)
testing_data1 = None # free up storage

In [None]:
quickdump(X_test1, 'testing_images1_101CAT.pickle')
X_test1 = None
quickdump(y_test1, 'testing_labels1_101CAT.pickle')
y_test1 = None

## Load in Testing2 Data

In [None]:
testing_data2 = load_videos(TestFrames2DIR)

In [None]:
X_test2, y_test2 = sort_by_frame(testing_data2)
testing_data2 = None

In [None]:
quickdump(X_test2, 'testing_images2_101CAT.pickle')
X_test2 = None
quickdump(y_test2, 'testing_labels2_101CAT.pickle')
y_test2 = None

## Load in Testing3 Data

In [None]:
testing_data3 = load_videos(TestFrames3DIR)

In [None]:
X_test3, y_test3 = sort_by_frame(testing_data3)
testing_data3 = None

In [None]:
quickdump(X_test3, 'testing_images3_101CAT.pickle')
X_test3 = None
quickdump(y_test3, 'testing_labels3_101CAT.pickle')
y_test3 = None

## Loading Training1

In [None]:
training_data1 = load_videos(TrainFrames1DIR)

In [None]:
X_train1, y_train1 = sort_by_frame(training_data3)
training_data1 = None

In [None]:
quickdump(X_train1, 'training_images1_101CAT.pickle')
X_train1 = None
quickdump(y_train1, 'training_labels1_101CAT.pickle')
y_train1 = None

## Loading Training2

In [None]:
training_data2 = load_videos(TrainFrames2DIR)

In [None]:
X_train2, y_train2 = sort_by_frame(training_data3)
training_data2 = None

In [None]:
quickdump(X_train2, 'training_images2_101CAT.pickle')
X_train2 = None
quickdump(y_train2, 'training_labels2_101CAT.pickle')
y_train2 = None

## Loading Training3

In [None]:
training_data3 = load_videos(TrainFrames3DIR)

In [None]:
X_train3, y_train3 = sort_by_frame(training_data3)
training_data3 = None

In [None]:
quickdump(X_train3, 'training_images3_101CAT.pickle')
X_train3 = None
quickdump(y_train3, 'training_labels3_101CAT.pickle')
y_train3 = None