# Deep Learning
Our deep learning model we uses get_dummies to encode our categorical data (genre and director). We created the test train split (default 75/25) and scaled our data using the standard scaler.  We used to_categorical to encode our target (rating class). Our model included 2 hidden layers with 100 nodes each. The accuracy on this model was 58%.

In [None]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from config import username, password
from numpy.random import seed
seed(42)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import tensorflow
tensorflow.keras.__version__
import warnings
warnings.filterwarnings('ignore')

from sklearn.ensemble import RandomForestClassifier


## Data Preprocessing

In [None]:
# Read in movie csv
movies = pd.read_csv("./Resources/imdb_final.csv")
movies.head()

In [None]:
# # Set vote column to integer type
# movies["median_vote"] = movies["median_vote"].astype(int)
# movies["median_vote"].unique()

In [None]:
# Set values for X and y
X = movies[["year", "genre", "duration", "director", "budget"]]
y = movies["rating_class"].values.reshape(-1,1)
print(X.shape, y.shape)

In [None]:
data = X.copy()
data

In [None]:
# # Dummy Encoding for genre column
data_binary_encoded = pd.get_dummies(data, columns= ["genre", "director"])
data_binary_encoded.head()

In [None]:
# Create train/test split
X_train, X_test, y_train, y_test = train_test_split(data_binary_encoded, y, random_state=42, stratify= y)


In [None]:
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train).reshape(-1,1)
encoded_y_test = label_encoder.transform(y_test).reshape(-1,1)
encoded_y_train

In [None]:
# Create StandardScaler model and fit to training data
X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(encoded_y_train)

# Transform training and testing data using X_scaler and y_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(encoded_y_train)
y_test_scaled = y_scaler.transform(encoded_y_test)
print(X_train_scaled[0])

In [None]:
from tensorflow.keras.utils import to_categorical
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical[0]

## Create Deep Learning Model

In [None]:
# Create deep learning model
# Visible layer with 3537 inputs
# Two hidden layers with 100 nodes
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
deep_model = Sequential()
deep_model.add(Dense(units=100, activation= "relu", input_dim=3537))
deep_model.add(Dense(units=100, activation= "relu"))
deep_model.add(Dense(units=100, activation= "relu"))
deep_model.add(Dense(units=3, activation= "softmax"))

In [None]:
# Compile and fit the model
deep_model.compile(optimizer = "adam", loss= "mse", metrics=["accuracy"])
deep_model.summary()


In [None]:
deep_model.fit(X_train_scaled, y_train_categorical, epochs=100, shuffle=True, verbose=2)

## Quantify the Model

In [None]:
model_loss, model_accuracy = deep_model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}")
print(f"Deep Neural Network - Accuracy: {model_accuracy}")

## Make Predictions

In [None]:
encoded_predictions = deep_model.predict_classes(X_test_scaled)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
print(encoded_predictions[:10])
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {y_test[:10]}")

In [None]:
# CLassification report
from sklearn.metrics import classification_report
print(classification_report(y_test, prediction_labels))

## Save Model

In [None]:
deep_model.save("dl.h5")