In [12]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50V2, VGG16, InceptionV3, Xception, MobileNetV2
from tensorflow.keras.applications.resnet_v2 import preprocess_input as resnet_pp
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg16_pp
from tensorflow.keras.applications.inception_v3 import preprocess_input as inceptionv3_pp
from tensorflow.keras.applications.xception import preprocess_input as xception_pp
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenetv2_pp

from tensorflow.keras.layers import Flatten, LSTM, Dense, Input, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split

import random
import os
import numpy as np
import matplotlib.pyplot as plt
from imageio import get_reader
from cv2 import resize
from IPython.display import Video

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
class FeatureExtractor:
  def __init__(self):
    pass
  def resnet(self, model, img_size):
    height, width = img_size
    b_model = ResNet50V2(weights='imagenet', include_top = False, input_shape=(height, width, 3))
    x = GlobalAveragePooling2D()(b_model.output)
    x = Flatten()(x)
    return Model(inputs = b_model.input, outputs = x)

  def vgg16(self, img_size):
    height, width = img_size
    b_model = VGG16(weights='imagenet', include_top = False, input_shape=(height, width, 3))
    x = GlobalAveragePooling2D()(b_model.output)
    x = Flatten()(x)
    return Model(inputs = b_model.input, outputs = x)

  def inceptionv3(self, img_size):
    height, width = img_size
    b_model = InceptionV3(weights='imagenet', include_top = False, input_shape=(height, width, 3))
    x = GlobalAveragePooling2D()(b_model.output)
    x = Flatten()(x)
    return Model(inputs = b_model.input, outputs = x)
  def xception(self, img_size):
    height, width = img_size
    b_model = Xception(weights='imagenet', include_top = False, input_shape=(height, width, 3))
    x = GlobalAveragePooling2D()(b_model.output)
    x = Flatten()(x)
    return Model(inputs = b_model.input, outputs = x)

  def mobilenetv2(self, img_size):
    height, width = img_size
    b_model = MobileNetV2(weights='imagenet', include_top = False, input_shape=(height, width, 3))
    x = GlobalAveragePooling2D()(b_model.output)
    x = Flatten()(x)
    return Model(inputs = b_model.input, outputs = x)


In [15]:
root = r'/content/drive/MyDrive/CV Projects/Badminton Highlight Creation'

In [16]:
def video_to_image(video_path, fps, target_size):
    imgs_as_np = []
    vid = get_reader(video_path, format='ffmpeg', fps= fps)
    for frame in vid:
        h, w = target_size
        img_array = resize(frame, (w, h))
        imgs_as_np.append(img_array)
    return np.array(imgs_as_np)

In [17]:
def video_to_features(feature_extractor, pp, video_path, fps, target_size):
    imgs_as_np = video_to_image(video_path, fps, target_size)
    preprocessed_images = pp(imgs_as_np)
    video_features = feature_extractor.predict(preprocessed_images)
    return video_features

In [18]:
videofiles=[]
for file in os.listdir(os.path.join(root, "ShotVideoData")):
    video_path = os.path.join(root, "ShotVideoData", file)
    videofiles.append({'file_dir': video_path,'label': 1})

for file in os.listdir(os.path.join(root, "notShotVideoData")):
    video_path = os.path.join(root,"notShotVideoData",file)
    videofiles.append({'file_dir': video_path,'label': 0})

In [None]:
video_data = list(videofiles)
random.shuffle(video_data)
img_size = (448, 448)
feature_extracter = FeatureExtractor().vgg16(img_size)
fps = 10
input_data, labels = [], []
for data in video_data:
    video_path, label = data['file_dir'], data['label']
    video_features = video_to_features(feature_extracter,vgg16_pp,video_path, fps, target_size = img_size)
    input_data.append(video_features)
    labels.append(label)
np.savez(os.path.join(root,"vgg16_input_data.npz"), np.array(input_data))
np.savez(os.path.join(root,"vgg16_labels.npz"), np.array(labels))



In [None]:
def plot_model_history(model_summary):
    plt.plot(model_summary.history['loss'])
    plt.plot(model_summary.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train','test'],loc='upper right')
    plt.show()

In [None]:
def classifier_performances(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    class_report = classification_report(y_true, y_pred)

    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1-score:", f1)
    print("\nClassification Report:\n", class_report)

In [None]:
input_data = np.load(os.path.join(root,"vgg16_input_data.npz"))["arr_0"]
labels = np.load(os.path.join(root,"vgg16_labels.npz"))["arr_0"]
x_train, x_test, y_train, y_test = train_test_split(np.array(input_data), np.array(labels), test_size = 0.2, random_state = 42)

In [None]:
def LSTMclassifier(input_shape):
    input_layer = Input(shape=input_shape)
    X = LSTM(units = 32, return_sequences = True)(input_layer)
    X = LSTM(units = 32)(X)
    Y = Dense(1, activation='sigmoid')(X)
    return Model(inputs = input_layer, outputs = Y)

In [None]:
input_shape = (x_train.shape[1], x_train.shape[2])
decoder = LSTMclassifier(input_shape)
decoder.compile(optimizer = Adam(learning_rate = 0.00001), loss = 'binary_crossentropy', metrics = ['accuracy'])
model_summary = decoder.fit(x_train, y_train, validation_data = (x_test, y_test), epochs = 120, batch_size = 16)
plot_model_history(model_summary)

In [None]:
y_test_pred = decoder.predict(x_test)
y_test_pred = y_test_pred.flatten() > 0.5
classifier_performances(y_test, y_test_pred)

for vgg16:
Accuracy: 0.7453416149068323
Precision: 0.6379310344827587
Recall: 0.6491228070175439
F1-score: 0.6434782608695653

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.80      0.80       104
           1       0.64      0.65      0.64        57

    accuracy                           0.75       161
   macro avg       0.72      0.72      0.72       161
weighted avg       0.75      0.75      0.75       161

for resnet50v2: (224,224)

Accuracy: 0.782608695652174
Precision: 0.6716417910447762
Recall: 0.7758620689655172
F1-score: 0.7200000000000001

Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.79      0.82       103
           1       0.67      0.78      0.72        58
    accuracy                           0.78       161
   macro avg       0.77      0.78      0.77       161
weighted avg       0.79      0.78      0.79       161