In [1]:
pip install numpy pandas scikit-learn implicit flask

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
import os

In [3]:
# Reading csv to dataframe
df = pd.read_csv("events.csv")
len(df)

884124

In [4]:
# Cleaning
df.dropna(inplace=True)
len(df)

515690

In [5]:
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix

# Encode user_id and product_id
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

df['user'] = user_encoder.fit_transform(df['user_id'])
df['item'] = item_encoder.fit_transform(df['product_id'])

# Create user-item interaction matrix
interaction_matrix = csr_matrix(
    (df['event_type'].apply(lambda x: 1 if x == 'view' else 2), 
     (df['user'], df['item']))
)

In [6]:
# Collaborative filtering
import implicit

# Train the model on the interaction matrix
# Note: implicit library expects the item-user matrix

if os.path.exists("recommendation.npz"):
  # model.load("recommendation.npz")
  data = np.load('recommendation.npz', allow_pickle=True)
  # Initialize a new AlternatingLeastSquares object
  model = implicit.als.AlternatingLeastSquares()
  # Load the model parameters from the .npz file
  model.__dict__.update(data)
else:
  # Initialize the model
  model = implicit.als.AlternatingLeastSquares(factors=50, regularization=0.1, iterations=30)
  model.fit(interaction_matrix.T)
  np.savez('recommendation.npz', **model.__dict__)

  from .autonotebook import tqdm as notebook_tqdm
  check_blas_config()
100%|██████████| 30/30 [01:11<00:00,  2.37s/it]


In [7]:
def recommend(user_id, user_encoder, item_encoder, model, interaction_matrix, n_recommendations=10):
    # Check if user_id is in the encoder
    if user_id not in user_encoder.classes_:
        return []

    # Get the internal user id
    internal_user_id = user_encoder.transform([user_id])[0]
    
    # Get recommendations (item ids and scores)
    recommendations = model.recommend(internal_user_id, interaction_matrix[internal_user_id], N=n_recommendations)
    
    # Decode the item ids back to product ids and handle unseen items
    recommended_items = []
    scores = []
    for item_id, score in zip(recommendations[0], recommendations[1]):
        try:
            recommended_items.append(item_encoder.inverse_transform([item_id])[0])
            scores.append(score)
        except ValueError:
            # Skip unseen labels
            continue
    
    return list(zip(recommended_items, scores))

In [8]:
import logging

# Setup logging
logging.basicConfig(filename="recommendation_log.log", level=logging.INFO)

def log_recommendations(user_id, recommendations):
    response = []
    for recommendation in recommendations:
        response.append({"product_id" : str(recommendation[0]), "score" : str(recommendation[1])})
    logging.info(f' User: {user_id}, Recommendations: {response}')

In [9]:
# Example: get recommendations for a specific user_id
user_id = 1515915625353230683
recommendations = recommend(user_id, user_encoder, item_encoder, model, interaction_matrix)
log_recommendations(user_id, recommendations)
print(recommendations)

[(1005425, 7.5565583e-07), (621123, 6.231506e-07)]


In [10]:
from sklearn.metrics import average_precision_score

# Evaluate recommendations (mean average precision)
def evaluate_model(true_items, recommended_items):
    if not recommended_items:
        return 0.0
    print(recommended_items)
    y_true = [1 if item[0] in true_items else 0 for item in recommended_items]
    y_scores = [score for item, score in recommended_items]
    return average_precision_score(y_true, y_scores)

# Example evaluation
true_items = [621123]  # Replace with actual user interaction data
map_score = evaluate_model(true_items, recommendations)
print(f'MAP Score: {map_score}')

[(1005425, 7.5565583e-07), (621123, 6.231506e-07)]
MAP Score: 0.5


In [11]:
start_user_id = df["user_id"].min()
end_user_id = df["user_id"].max()

counter = 0
for user_id in range(start_user_id, end_user_id + 1):
  if counter > 10:
    break
  recommendations = recommend(user_id, user_encoder, item_encoder, model, interaction_matrix)
  if len(recommendations):
    counter += 1
    log_recommendations(user_id, recommendations)

In [12]:
# Train the model with new data
df_new = pd.read_csv("new_events.csv")

# Clean data
df_new.dropna(inplace=True)

# Encode user_id and product_id
user_encoder_new = LabelEncoder()
item_encoder_new = LabelEncoder()

df_new['user'] = user_encoder_new.fit_transform(df_new['user_id'])
df_new['item'] = item_encoder_new.fit_transform(df_new['product_id'])

# Create user-item interaction matrix
interaction_matrix_new = csr_matrix(
    (df_new['event_type'].apply(lambda x: 1 if x == 'view' else 2), 
     (df_new['user'], df_new['item']))
)


if os.path.exists("recommendation.npz"):
    data = np.load('recommendation.npz', allow_pickle=True)
    # Initialize a new AlternatingLeastSquares object
    model = implicit.als.AlternatingLeastSquares()
    # Load the model parameters from the .npz file
    model.__dict__.update(data)
else:
    model = implicit.als.AlternatingLeastSquares(factors=50, regularization=0.1, iterations=100)
    model.fit(interaction_matrix_new.T)
    np.savez("recommendation.npz", **model.__dict__)

In [13]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/recommend', methods=['GET'])
def recommend_route():
    user_id = request.args.get('user_id', type=int)
    recommendations = recommend(user_id, user_encoder, item_encoder, model, interaction_matrix)
    response = []
    if recommendations:
        for recommendation in recommendations:
            response.append({"product_id" : str(recommendation[0]), "score" : str(recommendation[1])})
        return jsonify({"status": "success", "response": response})
    return jsonify({"status": "failed", "response" : "cannot recommend for this user due to insufficient data"})

if __name__ == '__main__':
    app.run()

 * Serving Flask app '__main__'
 * Debug mode: off
