In [6]:
# Using data from https://www.pyimagesearch.com/2019/07/15/video-classification-with-keras-and-deep-learning/
# We attempt to do the tutorial 1-3 on different data
import keras
from keras.models import Sequential
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, InputLayer, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D
from keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import os

In [7]:
def get_labels(pardir='test/data/sports/data', sort=True):
    
    from os.path import isdir, basename, join, isfile
    files = [f for f in os.listdir(pardir) if isdir(join(pardir, f))]

    train_image = []
    train_class = []

    for idir in files:
        clazz = basename(idir)
        if clazz.startswith("."):
            continue
        
        images = os.listdir(pardir+"/"+clazz)
        for i in tqdm(range(len(images))):
            
            image_name = images[i] 
            lc = image_name.lower()
            if not lc.endswith(".jpg") and not lc.endswith(".jpeg") and not lc.endswith(".png"):
                continue
            
            if isdir(join(pardir, clazz, image_name)):
                continue
                
            if lc.endswith(".ipynb_checkpoints"):
                continue
            
            train_image.append(clazz+"/"+image_name)
            train_class.append(clazz)
        
    train_data = pd.DataFrame()
    train_data['image'] = train_image
    train_data['class'] = train_class

    if sort:
        train_data = train_data.sort_values(by=['class'])
        
    return train_data

In [8]:
train = get_labels()

# converting the dataframe into csv file 
train.to_csv('train_sports.csv',header=True, index=False)

train

100%|██████████| 719/719 [00:00<00:00, 104252.24it/s]
100%|██████████| 611/611 [00:00<00:00, 104609.35it/s]
100%|██████████| 799/799 [00:00<00:00, 73790.05it/s]
100%|██████████| 715/715 [00:00<00:00, 92232.12it/s]
100%|██████████| 746/746 [00:00<00:00, 112706.25it/s]
100%|██████████| 715/715 [00:00<00:00, 110763.71it/s]
100%|██████████| 671/671 [00:00<00:00, 104518.81it/s]
100%|██████████| 6/6 [00:00<00:00, 28024.30it/s]
100%|██████████| 495/495 [00:00<00:00, 42282.15it/s]
100%|██████████| 713/713 [00:00<00:00, 108474.69it/s]
100%|██████████| 713/713 [00:00<00:00, 105942.28it/s]
100%|██████████| 679/679 [00:00<00:00, 114310.52it/s]
100%|██████████| 635/635 [00:00<00:00, 110070.80it/s]
100%|██████████| 572/572 [00:00<00:00, 107334.55it/s]
100%|██████████| 689/689 [00:00<00:00, 112929.87it/s]
100%|██████████| 481/481 [00:00<00:00, 100561.27it/s]
100%|██████████| 718/718 [00:00<00:00, 111216.13it/s]
100%|██████████| 938/938 [00:00<00:00, 120520.07it/s]
100%|██████████| 705/705 [00:00<00:0

Unnamed: 0,image,class
11268,badminton/00000740.jpg,badminton
10813,badminton/00000924.jpg,badminton
10812,badminton/00000930.jpg,badminton
10811,badminton/00000918.jpg,badminton
10810,badminton/00000529.jpg,badminton
...,...,...
4424,wwe/00000707.jpg,wwe
4425,wwe/00000713.jpg,wwe
4426,wwe/00000049.jpg,wwe
4428,wwe/00000263.jpg,wwe


In [9]:
def train_images(pdir='test/data/sports/data', train=train):
    
    # creating an empty list
    train_image = []

    # for loop to read and store frames
    for i in tqdm(range(train.shape[0])):
        # loading the image and keeping the target size as (500,300,3)
        # The images are variable size
        impath = pdir+'/'+train['image'][i]
        if i % 1000 == 0:
            print(impath)
        try:
            img = image.load_img(impath, target_size=(500,300,3))
            # converting it to array
            img = image.img_to_array(img)
            # normalizing the pixel value
            img = img/255
            # appending the image to the train_image list
            train_image.append(img)
        except:
            raise Exception("Problem with {}".format(impath))
    return train_image

In [10]:
train_image = train_images()

  0%|          | 17/14360 [00:00<01:40, 142.35it/s]

test/data/sports/data/gymnastics/00000372.jpg


  7%|▋         | 1032/14360 [00:04<00:56, 237.31it/s]

test/data/sports/data/wrestling/00000292.jpg


 14%|█▍        | 2031/14360 [00:08<00:51, 238.37it/s]

test/data/sports/data/football/00000023.jpg


 21%|██        | 3033/14360 [00:13<00:44, 251.84it/s]

test/data/sports/data/baseball/00000307.jpg


 28%|██▊       | 4033/14360 [00:17<00:46, 220.27it/s]

test/data/sports/data/ice_hockey/00000678.jpg


 35%|███▌      | 5043/14360 [00:21<00:39, 236.89it/s]

test/data/sports/data/basketball/00000110.jpg


 42%|████▏     | 6033/14360 [00:25<00:34, 239.64it/s]

test/data/sports/data/table_tennis/00000585.jpg


 49%|████▉     | 7024/14360 [00:30<00:33, 219.18it/s]

test/data/sports/data/motogp/00000511.jpg


 56%|█████▌    | 8040/14360 [00:34<00:25, 251.93it/s]

test/data/sports/data/fencing/00000022.jpg


 63%|██████▎   | 9043/14360 [00:39<00:22, 235.88it/s]

test/data/sports/data/swimming/00000535.jpg


 70%|██████▉   | 10021/14360 [00:43<00:19, 222.16it/s]

test/data/sports/data/tennis/00000266.jpg


 77%|███████▋  | 11025/14360 [00:48<00:15, 211.14it/s]

test/data/sports/data/badminton/00000333.jpg


 84%|████████▍ | 12030/14360 [00:52<00:11, 206.47it/s]

test/data/sports/data/boxing/00000146.jpg


 91%|█████████ | 13032/14360 [00:57<00:06, 193.37it/s]

test/data/sports/data/kabaddi/00000445.jpg


 98%|█████████▊| 14036/14360 [01:02<00:01, 187.54it/s]

test/data/sports/data/shooting/00000071.jpg


100%|██████████| 14360/14360 [01:04<00:00, 223.60it/s]


In [11]:
print("Make array and split")
# converting the list to numpy array
X = np.array(train_image)

# separating the target
y = train['class']

# creating the training and validation set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2, stratify = y)
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)
print("Complete")

In [13]:
base_model = VGG16(weights='imagenet', include_top=False)

In [None]:
X_train = base_model.predict(X_train)
X_test = base_model.predict(X_test)