In [1]:
### Requirements
# pip install lightfm

# Imports & Preparation

In [2]:
# Public
import pandas as pd
import numpy as np

from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k, recall_at_k
from lightfm.evaluation import auc_score

# Custom
from data_preparation import get_data
from lightfmFunctions import get_recoms, get_similar_modules
from support_functions import get_top_sorted

from classes import Mappings

In [3]:
data, fm_data, users, user_ids, items, item_ids = get_data()

# Model

In [4]:
dataset = Dataset()
dataset.fit(users=user_ids, items=item_ids)

mappings = Mappings(dataset)

interactions, _ = dataset.build_interactions(fm_data.to_records(index=False))
model = LightFM(loss="warp", learning_rate=0.05, random_state=42)
model.fit(interactions=interactions, epochs=100)

<lightfm.lightfm.LightFM at 0x22a0febb080>

In [5]:
# Get recommendations for all users
recs_for_all_users = pd.DataFrame()
for i in user_ids:
    recs_for_all_users = recs_for_all_users.append(
        get_recoms(fm_data=fm_data, items=items, dataset=dataset, model=model, mappings=mappings, user_id=i, num_recs=10)
    )

In [6]:
# Drop all ratings for user for not enough ratings (recommendations wouldn't be good)
counts = fm_data.user_id.value_counts()
users_with_enough_reatings = fm_data[fm_data.apply(lambda x: counts[x.user_id], axis=1) >= 3]["user_id"].unique()

mask = recs_for_all_users.apply(lambda x: x.user_id in users_with_enough_reatings, axis=1)
recs_for_users_with_enough_ratings = recs_for_all_users[mask]

In [14]:
# Create and write sql queries to file
def create_sql(df):
    f.write(f"INSERT INTO `recommendations_for_users`(`user_id`, `item_id`) VALUES ({df.user_id}, {df.item_id});\n")

f = open("sql_queries.txt","w+")
f.write("DELETE FROM recommendations_for_users;\n")
recs_for_users_with_enough_ratings.apply(lambda x: create_sql(x), axis=1)
f.close()

In [9]:
# Get recommendations for all modules
recs_for_all_items = pd.DataFrame()
for i in item_ids:
    recs_for_all_items = recs_for_all_items.append(
        get_similar_modules(items=items, model=model, mappings=mappings, base_item_id=i, num_of_rec=50)
    )

In [10]:
# Drop all ratings for user for not enough ratings (recommendations wouldn't be good)
counts = fm_data.item_id.value_counts()
items_with_enough_reatings = fm_data[fm_data.apply(lambda x: counts[x.item_id], axis=1) >= 3]["item_id"].unique()

mask = recs_for_all_items.apply(lambda x: x.item_id in items_with_enough_reatings, axis=1)
recs_for_items_with_enough_ratings = recs_for_all_items[mask]

In [13]:
# Create and write sql queries to file
def create_sql(df):
    f.write(f"INSERT INTO `recommendations_for_items`(`base_item_id`, `item_id`, `rank`) VALUES ({df.base_item_id}, {df.item_id}, {df['rank']});\n")
    
f = open("sql_queries_for_modules.txt","w+")
f.write("DELETE FROM recommendations_for_items;\n")
recs_for_items_with_enough_ratings.apply(lambda x: create_sql(x), axis=1)
f.close()