# Build a Recommendation System for Purchase Data

The score of this notebook is creating a Model Package to deploy the model as an hosted service on AWS

### The Goal
When the user login in, we may recommend top 10 items to be added to their basket

**Notice**: It's a really simple scenario. We assume that predictions are already provided. No Automated Pipeline. A really plain vanilla example!


In [7]:
#Libraries

#Data Science
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
# from surprise import Reader
# from surprise import Dataset
# from surprise.model_selection import cross_validate
# from surprise import NormalPredictor, BaselineOnly
# from surprise import KNNBasic,KNNWithMeans,KNNWithZScore,KNNBaseline
# from surprise import SVD,SVDpp,NMF
# from surprise import SlopeOne, CoClustering
# from surprise.model_selection import train_test_split
# from surprise.accuracy import rmse, mae
from surprise import dump

#Model Tracking
import mlflow
from mlflow.tracking import MlflowClient
TRACKING_URI = 'http://mlflow:5000'
mlflow.set_tracking_uri(TRACKING_URI)

#Utils
import os
import configparser
from collections import defaultdict
import tempfile
import json

In [2]:
cwd = os.getcwd()

## Download Model Artefact

In [4]:
client = MlflowClient()

In [5]:
champion=client.get_registered_model('Champion')
championid=champion.latest_versions[-1].run_id
client.download_artifacts(championid, 'model/model.pkl', cwd)

  from collections import Mapping, MutableMapping


'/home/jovyan/work/ModeLab/ModelApp/notebook/model/model.pkl'

## Analyze Model Artifact

In [15]:
algopath = os.path.join(cwd, 'model/model.pkl')
predictions, algo = dump.load(algopath)

In [23]:
print(predictions[0:10])
print('*'*100)
print(algo)

[Prediction(uid='533', iid='245', r_ui=24.0, est=9.096523386141806, details={'was_impossible': False}), Prediction(uid='8609', iid='275', r_ui=0.0, est=3.847388464042559, details={'was_impossible': False}), Prediction(uid='16414', iid='77', r_ui=0.0, est=3.331804696233321, details={'was_impossible': False}), Prediction(uid='3251', iid='17', r_ui=6.0, est=1.9515844176243702, details={'was_impossible': False}), Prediction(uid='27040', iid='151', r_ui=0.0, est=6.427005270702793, details={'was_impossible': False}), Prediction(uid='15242', iid='105', r_ui=0.0, est=2.2707869839722457, details={'was_impossible': False}), Prediction(uid='18643', iid='42', r_ui=10.0, est=2.67216763458241, details={'was_impossible': False}), Prediction(uid='20155', iid='71', r_ui=20.0, est=15.860727324132027, details={'was_impossible': False}), Prediction(uid='3355', iid='2', r_ui=3.0, est=2.4905236427530113, details={'was_impossible': False}), Prediction(uid='1054', iid='7', r_ui=0.0, est=5.053559316605646, det

## Model Pipeline

#### Read Data 

In [None]:
def reader():
    dbconnPath = './dbconn.properties'
    # Set dbconnection variables
    config = configparser.RawConfigParser()
    config.read(dbconnPath)
    params = config
    db_host=params.get('CONN', 'host')
    db_port=params.get('CONN', 'port')
    db_user=params.get('CONN', 'user')
    db_pwd=params.get('CONN', 'password')
    db_name=params.get('CONN', 'database')
    # Set connection string
    connection_str = f'mysql+pymysql://{db_user}:{db_pwd}@{db_host}:{db_port}/{db_name}'
    # connect to database
    engine = sql.create_engine(connection_str)
    connection = engine.connect()
    df_trx = pd.read_sql("select * from TRANSACTIONS", connection)
    connection.close()

#### Trasform data

In [None]:
def trasformer():
    data_prep_1=pd.DataFrame(df_trx.products.str.split('|').tolist(), index=df_trx.customerId)\
.stack()\
.reset_index()\
.groupby(['customerId', 0])\
.agg({0: 'count'})\
.rename(columns={0: 'purchase_count'})\
.reset_index()\
.rename(columns={0: 'productId'})
    data_prep_2 = pd.pivot_table(data=data_prep_1, index='customerId', columns='productId', values='purchase_count', aggfunc='sum')
data_prep_3 = (data_prep_2 - data_prep_2.min())/(data_prep_2.max() - data_prep_2.min())
data_prep_4 = data_prep_3.reset_index().melt(id_vars=['customerId'], value_name='prod_ratings').dropna()
data_prep_4.index = np.arange(0, len(data_prep_4))
data_prep_4['prod_ratings'] = data_prep_4['prod_ratings'].apply(lambda x: int((round(x, 2))*100))

#### Load Data

In [None]:
def loader():
    # A reader is required with the rating_scale param
    mindata = data_prep_4.prod_ratings.min()
    maxdata = data_prep_4.prod_ratings.max()
    reader = Reader(rating_scale=(mindata,maxdata))

    # The columns must correspond to user id, item id and ratings (in that order)
    data_prep_5 = data_prep_4.rename(columns={'customerId': 'userID', 'productId':'itemID', 'prod_ratings':'rating'})
    data = Dataset.load_from_df(data_prep_5[['userID', 'itemID', 'rating']], reader)

#### Predict

In [None]:
def server():
    

#### Get top-n recommendation for each user

In [26]:
def get_top_n(predictions, n=10):
    
    '''
    Function to return the top-N recommendation for each user 
    from a set of predictions.
    
    param: predictions: The list of predictions
    param: n: Recommendations for each user. Default is 10.
    Return: dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

#### Get top-n recommendation for a single ui

In [29]:
def get_top_n_ui(top_n, ui):
    '''
    Function to return the top-N recommendation for a
    particular userid from a set of predictions. 
    
    param: top_n: dict of top recommendations per userid
    param: ui: string of userid
    Return: dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    
    '''
    try:
        return {k:v for k,v in top_n.items() if ui==k}
    except ValueError: # user was not part of the trainset
        return 0

In [31]:
def main():
    pass
    