# Genre Predictor by Movie Poster
An excercise for the AI Couse

In [4]:
import ast

import pandas as pd
import copy
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ast import literal_eval
from keras.preprocessing import image
from sklearn.metrics import hamming_loss, multilabel_confusion_matrix, roc_curve, classification_report
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MultiLabelBinarizer
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16
from keras.applications import ResNet50V2
from keras.applications import ConvNeXtSmall
from keras.applications import EfficientNetB0
from keras.applications import MobileNetV2
from keras.models import Sequential
from keras.layers import *
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, EarlyStopping




# Data Preparation

In [5]:
# Load the CSV file with image file paths and corresponding genres
csv_path = 'data/Multi_Label_dataset/train.csv'
df = pd.read_csv(csv_path)

# Define paths to the directory containing movie posters
image_dir = 'data/Multi_Label_dataset/Images/'

# Join the directory path with the image filenames
df['image_path'] = df['Id'].apply(lambda x: image_dir + x + '.jpg')

df.head(5)


Unnamed: 0,Id,Genre,Action,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,...,News,Reality-TV,Romance,Sci-Fi,Short,Sport,Thriller,War,Western,image_path
0,tt0086425,"['Comedy', 'Drama']",0,0,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,data/Multi_Label_dataset/Images/tt0086425.jpg
1,tt0085549,"['Drama', 'Romance', 'Music']",0,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0,0,data/Multi_Label_dataset/Images/tt0085549.jpg
2,tt0086465,['Comedy'],0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,data/Multi_Label_dataset/Images/tt0086465.jpg
3,tt0086567,"['Sci-Fi', 'Thriller']",0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,data/Multi_Label_dataset/Images/tt0086567.jpg
4,tt0086034,"['Action', 'Adventure', 'Thriller']",1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,data/Multi_Label_dataset/Images/tt0086034.jpg


In [6]:
# Input NP Array (Image Data directly as pixel values)
width = 300
height = 300
x_data = []
for index, row in df.iterrows():
    img = image.load_img(row['image_path'],target_size=(height,width,3))
    img = image.img_to_array(img)
    img = img/255.0
    x_data.append(img)

x_data = np.array(x_data)
y_data = df.drop(['Id', 'Genre', 'image_path'], axis=1).to_numpy()

num_classes = y_data.shape[1]

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.1)
x_train, x_val, y_train,y_val = train_test_split(x_train, y_train, test_size=0.3)

In [7]:
def get_resnet_model(fine_tune=0):
    #based on https://www.learndatasci.com/tutorials/hands-on-transfer-learning-keras/
    # Load the pre-trained Resnet model
    base_model = ResNet50V2(include_top=False, input_shape=(300, 300, 3))

    # Freeze the layers of the pre-trained model (exclude fine tuning layers)
    if fine_tune > 0:
        for layer in base_model.layers[:-fine_tune]:
                layer.trainable = False
    else:
        for layer in base_model.layers:
                layer.trainable = False

    # Create a new model for genre prediction
    return Sequential([
        base_model,
        Flatten(),
        Dense(512, activation='relu'),
        Dense(128, activation='relu'),
        Dropout(0.1),
        Dense(num_classes, activation='relu')
    ])

def train_model(x_train, x_val, y_train, y_val, p_model, num_epochs, p_model_filename):
    p_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    # Define callbacks
    checkpoint = ModelCheckpoint(p_model_filename,
                                 save_best_only=True,
                                 save_weights_only=True,
                                 monitor='val_loss',
                                 mode='min',
                                 verbose=1)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=1)
    # Run the actual training and return training history
    history = p_model.fit(
        x_train,
        y_train,
        batch_size=32,
        epochs=num_epochs,  # Adjust the number of epochs as needed
        validation_data=(x_val, y_val),
        callbacks=[checkpoint, early_stopping]
    )
    return history

In [8]:
model = get_resnet_model()
history = train_model(x_train, x_val, y_train, y_val, model, 10, 'temp')



Epoch 1/10


Epoch 1: val_loss improved from inf to 2.06598, saving model to temp
Epoch 2/10
Epoch 2: val_loss improved from 2.06598 to 2.01536, saving model to temp
Epoch 3/10
Epoch 3: val_loss did not improve from 2.01536
Epoch 4/10
Epoch 4: val_loss did not improve from 2.01536
Epoch 5/10
Epoch 5: val_loss did not improve from 2.01536
Epoch 6/10
Epoch 6: val_loss did not improve from 2.01536
Epoch 7/10
Epoch 7: val_loss did not improve from 2.01536
Epoch 7: early stopping


In [None]:
fig, ax = plt.subplots()
plt.plot(history.history['accuracy'], label='Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
ax.set_ylim(0, 1)
plt.show()