In [48]:
import pandas as pd
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split, cross_validate
import joblib

In [49]:
df = pd.read_csv('../data/data.csv', delimiter=';')

In [50]:
df.dropna(inplace=True)

In [51]:
df['product_id'] = df['product_id'].astype(int)
df['store_id'] = df['store_id'].astype(int)
df['transaction_qty'] = df['transaction_qty'].astype(int)

In [52]:
reader = Reader(rating_scale=(1, df['transaction_qty'].max()))
data = Dataset.load_from_df(df[['store_id', 'product_id', 'transaction_qty']], reader)

In [53]:
trainset, testset = train_test_split(data, test_size=0.25)

In [56]:
algo = SVD()
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x25022d4b4c0>

In [57]:
predictions = algo.test(testset)
rmse = accuracy.rmse(predictions)
print(f"Model RMSE: {rmse}")

RMSE: 0.5090
Model RMSE: 0.5089639507576648


In [58]:
joblib.dump(algo, 'coffee_recommendation_svd_model.pkl')

['coffee_recommendation_svd_model.pkl']

In [59]:
with open('model_accuracy.txt', 'w') as f:
    f.write(str(rmse))

In [60]:
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.5004  0.5060  0.5014  0.5080  0.5035  0.5039  0.0028  
MAE (testset)     0.4407  0.4356  0.4365  0.4378  0.4368  0.4375  0.0017  
Fit time          0.69    0.65    0.68    0.66    0.73    0.68    0.03    
Test time         0.14    0.08    0.14    0.08    0.09    0.10    0.03    


{'test_rmse': array([0.50041248, 0.50602069, 0.50142428, 0.50798344, 0.50353068]),
 'test_mae': array([0.44065993, 0.43560738, 0.43646458, 0.43784372, 0.43677541]),
 'fit_time': (0.6905961036682129,
  0.6481094360351562,
  0.6786293983459473,
  0.6648895740509033,
  0.733259916305542),
 'test_time': (0.14062070846557617,
  0.07896614074707031,
  0.13968324661254883,
  0.07617425918579102,
  0.08572506904602051)}