In [1]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load data
ratings_df = pd.read_csv('ratings.csv')

ratings_df = ratings_df.head(1000)

print("Load Dataset - Done")

# Split data into train and test sets
train_df, test_df = train_test_split(ratings_df, test_size=0.2)

print("Splitting Dataset - Done")

# Define XGBoost parameters
params = {'objective': 'reg:squarederror', 'max_depth': 3, 'learning_rate': 0.1}

# Train GBM model
dtrain = xgb.DMatrix(train_df[['userId', 'movieId']], label=train_df['rating'])
dtest = xgb.DMatrix(test_df[['userId', 'movieId']])
model = xgb.train(params, dtrain)

print("Train xGBoost model - Done")

# Make predictions
preds = model.predict(dtest)

print("Making Predictions - Done")

# Evaluate model
from sklearn.metrics import mean_squared_error,mean_absolute_error, explained_variance_score, median_absolute_error

mae = mean_absolute_error(test_df['rating'], preds)
rmse = mean_squared_error(test_df['rating'], preds, squared=False)
mape = (abs(test_df['rating'] - preds) / test_df['rating']).mean() * 100
evs = explained_variance_score(test_df['rating'], preds)
medae = median_absolute_error(test_df['rating'], preds)

print("xGBoost model - Evaluated")
print('MAE: ', mae)
print('RMSE: ', rmse)
print('MAPE: ', mape)
print('Explained Variance Score: ', evs)
print('Median Absolute Error: ', medae)

C:\Users\rithe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
C:\Users\rithe\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


Load Dataset - Done
Splitting Dataset - Done
Train xGBoost model - Done
Making Predictions - Done
xGBoost model - Evaluated
MAE:  1.3090339171886445
RMSE:  1.4444749637781016
MAPE:  40.81852896800117
Explained Variance Score:  -0.011643590285766336
Median Absolute Error:  1.4488811492919922
