In [17]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load data
ratings_df = pd.read_csv('/content/drive/MyDrive/ColabDrive/ratings.csv')
movies_df = pd.read_csv('/content/drive/MyDrive/ColabDrive/movies.csv')

print("Load Dataset - Done")

# Merge data
df = pd.merge(ratings_df, movies_df, on='movieId')

# Create one-hot encoding of movie genres
genres = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
for genre in genres:
    df[genre] = df['genres'].apply(lambda x: 1 if genre in x else 0)

# Select the first 1000 records
df = df.head(1000)

# Split data into train and test sets
train_df, test_df = train_test_split(df, test_size=0.2)

print("Splitting Dataset - Done")

# Define RNN model
model = Sequential()
model.add(LSTM(64, input_shape=(len(genres), 1)))
model.add(Dense(1))

# Train RNN model
model.compile(optimizer='adam', loss='mse')
history = model.fit(train_df[genres].values.reshape(-1, len(genres), 1), train_df['rating'], epochs=10)


print("Train RNN model - Done")

# Make predictions
test_size = test_df.shape[0] - (test_df.shape[0] % 19)
test_df_subset = test_df.head(test_size)
preds = model.predict(test_df_subset[genres].values.reshape(-1, 19, 1))

# Evaluate model
mae = mean_absolute_error(test_df_subset['rating'], preds)
mape = (abs(test_df_subset['rating'].values - preds.flatten()) / test_df_subset['rating'].values).mean() * 100
evs = explained_variance_score(test_df_subset['rating'], preds)
medae = median_absolute_error(test_df_subset['rating'], preds)

print("RNN model - Evaluated")
print('MAE: ', mae)
print('RMSE: ', rmse)
print('MAPE: ', mape)
print('Explained Variance Score: ', evs)
print('Median Absolute Error: ', medae)

Load Dataset - Done
Splitting Dataset - Done
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train RNN model - Done
RNN model - Evaluated
MAE:  0.7859430865237588
RMSE:  0.9333649116325208
MAPE:  25.760783395572012
Explained Variance Score:  -2.220446049250313e-16
Median Absolute Error:  0.9960811138153076
