In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import os
import glob

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import classification_report, confusion_matrix, matthews_corrcoef

In [2]:
batch_size = 64
epochs = 10

# Build simple cnn model

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(27, activation='sigmoid')
])

In [None]:
model.summary()

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(1e-4), metrics=['accuracy'])

# Load Data for use
- First split to train and test
- Load as a tf dataset 

In [None]:
df = pd.read_csv('data/MovieGenre.csv')
images = glob.glob('data/downloaded_posters/*.jpg')

In [None]:
df = df.dropna(subset=['Genre'])

In [None]:
df['labels'] = df['Genre'].apply(lambda x: x.split('|'))

In [None]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=2021)

In [None]:
train_image = []
train_labels = []

test_image = []
test_labels = []

for val in train_df.itertuples(index=False):
    image_name = 'data/downloaded_posters/' + str(val.imdbId) + '.jpg'
    if os.path.exists(image_name):
        train_image.append(image_name)
        train_labels.append(val.labels)
        
for val in test_df.itertuples(index=False):
    image_name = 'data/downloaded_posters/' + str(val.imdbId) + '.jpg'
    if os.path.exists(image_name):
        test_image.append(image_name)
        test_labels.append(val.labels)

In [None]:
binarizer = MultiLabelBinarizer().fit(train_labels)
train_labels = binarizer.transform(train_labels)
test_labels = binarizer.transform(test_labels)

In [None]:
index_label = dict([(index, cls) for index, cls in enumerate(binarizer.classes_)])

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_image, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_image, test_labels))

In [None]:
@tf.function
def read_data(image, labels):
    img = tf.io.decode_jpeg(tf.io.read_file(image), channels=3)
    img = tf.cast(img, tf.float32)
    img = tf.image.resize(img, [224, 224])
    
    return img / 255., tf.cast(labels, tf.float32)

In [None]:
train_dataset = train_dataset.shuffle(len(train_dataset)).map(
    read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE
).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

test_dataset = test_dataset.map(
    read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE
).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

# Train & Plot History

In [None]:
history = model.fit(train_dataset, validation_data=test_dataset, epochs=epochs)