In [1]:
import os
import cv2
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import ConfusionMatrixDisplay, classification_report, f1_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Rescaling, Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.optimizers import Adam

In [2]:
# PERFORM DATA AUGMENTATION

In [3]:
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

np.set_printoptions(suppress=True)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
GRAY_CROPPED_POSITIVES_FOLDER_PATH = '/content/drive/MyDrive/Bundesliga/PositiveFrames/frames_gray_dg/'

In [63]:
img_gen = ImageDataGenerator(rescale=1/255)

In [64]:
train_data = img_gen.flow_from_directory(GRAY_CROPPED_POSITIVES_FOLDER_PATH + 'train', 
                                         class_mode='sparse', 
                                         batch_size=64, 
                                         target_size=(224, 224),
                                         )

Found 2543 images belonging to 3 classes.


In [65]:
valid_data = img_gen.flow_from_directory(GRAY_CROPPED_POSITIVES_FOLDER_PATH + 'val', 
                                         class_mode='sparse', 
                                         batch_size=64, 
                                         target_size=(224, 224),
                                         )

Found 677 images belonging to 3 classes.


In [66]:
test_data = img_gen.flow_from_directory(GRAY_CROPPED_POSITIVES_FOLDER_PATH + 'test', 
                                         class_mode='sparse', 
                                         batch_size=64, 
                                         target_size=(224, 224),
                                         )

Found 652 images belonging to 3 classes.


In [67]:
filecount = list()
FOLDER = GRAY_CROPPED_POSITIVES_FOLDER_PATH

for set_ in os.listdir(FOLDER):
    for class_ in os.listdir(os.path.join(FOLDER, set_)):
        filecount.append((set_, class_, len(os.listdir(os.path.join(FOLDER, set_, class_)))))

In [68]:
filecount

[('train', 'challenge', 344),
 ('train', 'throwin', 117),
 ('train', 'play', 2082),
 ('val', 'challenge', 94),
 ('val', 'throwin', 22),
 ('val', 'play', 561),
 ('test', 'challenge', 113),
 ('test', 'throwin', 21),
 ('test', 'play', 518)]

In [58]:
class_weights = dict()
events = list()
idx = 0

for file_ in filecount:
  if file_[0] == 'train':
    events.append(idx)
    class_weights[idx] = file_[2]
    idx += 1

samples = sum(class_weights.values())

for event in events:
  class_weights[event] = samples / class_weights[event]

class_weights

{0: 7.392441860465116, 1: 21.735042735042736, 2: 1.2214217098943323}

In [56]:
class_weights = dict()
events = list()

for file_ in filecount:
  if file_[0] == 'train':
    events.append(file_[1])
    class_weights[events[-1]] = file_[2]

samples = sum(class_weights.values())

for event in events:
  class_weights[event] = samples / class_weights[event]

class_weights

{'challenge': 7.392441860465116,
 'throwin': 21.735042735042736,
 'play': 1.2214217098943323}

In [11]:
vgg_model = VGG16(include_top=False, input_shape=(224, 224, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [69]:
model = Sequential([vgg_model,
                    Flatten(),
                    Dense(128, activation='relu'),
                    Dense(3, activation='softmax')
])

In [70]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten_1 (Flatten)         (None, 25088)             0         
                                                                 
 dense_2 (Dense)             (None, 128)               3211392   
                                                                 
 dense_3 (Dense)             (None, 3)                 387       
                                                                 
Total params: 17,926,467
Trainable params: 3,211,779
Non-trainable params: 14,714,688
_________________________________________________________________


In [71]:
model.input_shape

(None, 224, 224, 3)

In [72]:
model.output_shape

(None, 3)

In [73]:
vgg_model.trainable=False

In [74]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten_1 (Flatten)         (None, 25088)             0         
                                                                 
 dense_2 (Dense)             (None, 128)               3211392   
                                                                 
 dense_3 (Dense)             (None, 3)                 387       
                                                                 
Total params: 17,926,467
Trainable params: 3,211,779
Non-trainable params: 14,714,688
_________________________________________________________________


In [75]:
def train_model(model, train_data=train_data, valid_data=valid_data, epochs=100, batch_size=16, patience=3, account_weights=True):
  """Trains the model"""
  callback = EarlyStopping(monitor='val_loss',
                         patience=patience,
                         restore_best_weights=True)
  
  if account_weights:
    class_weights = dict()
    events = list()
    idx = 0

    for file_ in filecount:
      if file_[0] == 'train':
        events.append(idx)
        class_weights[idx] = file_[2]
        idx += 1

    samples = sum(class_weights.values())

    for event in events:
      class_weights[event] = samples / class_weights[event]


  else:
    class_weights = None

  model.fit(train_data,
            validation_data=valid_data, 
            batch_size=batch_size, 
            epochs=epochs,
            class_weight=class_weights,
            steps_per_epoch=len(train_data),
            validation_steps=len(valid_data),
            callbacks=[callback])
  
  return model

In [76]:
def compile_model(model, lr=0.0001):
  """Compiles the model"""
  optimizer = Adam(learning_rate=lr)

  model.compile(loss='sparse_categorical_crossentropy',
              optimizer=optimizer,
              metrics='accuracy',
              )
  return model

In [77]:
model = compile_model(model)

In [78]:
model = train_model(model)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


In [79]:
model.save('/content/drive/MyDrive/Bundesliga/Models/model_ms4_1')

In [None]:
# model = load_model('/content/drive/MyDrive/Bundesliga/Models/model_ms3_6')

2nd try:

In [80]:
model = compile_model(model, lr=0.00001)

In [81]:
model = train_model(model)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
