# New Section

In [2]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [3]:
#import data set
!git clone https://github.com/jordantangy/Movie-Posters-Dataset.git

Cloning into 'Movie-Posters-Dataset'...
remote: Enumerating objects: 2, done.[K
remote: Counting objects: 100% (2/2), done.[K
remote: Total 7871 (delta 0), reused 2 (delta 0), pack-reused 7869[K
Receiving objects: 100% (7871/7871), 246.26 MiB | 60.25 MiB/s, done.
Checking out files: 100% (7868/7868), done.


In [4]:
data = pd.read_csv('/content/Movie-Posters-Dataset/Dataset/train.csv')
data.head()

Unnamed: 0,Id,Genre,Action,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,Family,Fantasy,History,Horror,Music,Musical,Mystery,N/A,News,Reality-TV,Romance,Sci-Fi,Short,Sport,Thriller,War,Western
0,tt0086425,"['Comedy', 'Drama']",0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,tt0085549,"['Drama', 'Romance', 'Music']",0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0
2,tt0086465,['Comedy'],0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,tt0086567,"['Sci-Fi', 'Thriller']",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0
4,tt0086034,"['Action', 'Adventure', 'Thriller']",1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0


In [5]:
pic_width = 350
pic_height = 350
X = []
for i in tqdm(range(int(data.shape[0]))):
  path = '/content/Movie-Posters-Dataset/Dataset/Images/'+ data['Id'][i] +'.jpg'
  img = image.load_img(path, target_size=(pic_width,pic_height,3))
  img = image.img_to_array(img)
  img = img/255.0
  X.append(img)

X = np.array(X)

100%|██████████| 7254/7254 [00:29<00:00, 244.39it/s]


In [6]:
#matrix y comes from the csv file, we just drop the column id and genre.
#So we have a matrix representing which genre each movie belongs to

y = data.drop(['Id','Genre'], axis = 1)
y = y.to_numpy()
print(y)
print(y.shape)

#split training and testing data sets

X_train, X_test , y_train, y_test = train_test_split(X, y , random_state = 0 , test_size = 0.15)


[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]]
(7254, 25)


## Building CNN 

In [14]:
model = Sequential()
model.add(Conv2D(16, (3,3), activation = 'relu', input_shape = X_train[0].shape))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.3))

model.add(Conv2D(32, (3,3), activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.3))

model.add(Conv2D(64, (3,3), activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.4))

model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(2,2))
model.add(Dropout(0.5))

model.add(Flatten())

model.add(Dense(128,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(128,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(25,activation='sigmoid'))

model.summary()

Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 348, 348, 16)      448       
_________________________________________________________________
batch_normalization_21 (Batc (None, 348, 348, 16)      64        
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 174, 174, 16)      0         
_________________________________________________________________
dropout_20 (Dropout)         (None, 174, 174, 16)      0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 172, 172, 32)      4640      
_________________________________________________________________
batch_normalization_22 (Batc (None, 172, 172, 32)      128       
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 86, 86, 32)      