In [4]:
import pandas as pd
import csv
import numpy as np
import pandas as pd

from lightfm.data import Dataset
from lightfm import LightFM
from lightfm import cross_validation
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score
from lightfm.evaluation import recall_at_k

Process the downloaded data (.idata and .udata) from text file to usable CSV file. Export the CSV files to use

In [5]:
with open('Sushi_data/Original_datasets/sushi3.idata', 'r') as in_file:
    stripped = (line.strip() for line in in_file)
    lines = (line.split("\t") for line in stripped if line)
    with open('sushi_features.csv', 'w') as out_file:
        writer = csv.writer(out_file)
        writer.writerow(('item_ID', 'name','style','major_group','minor_group','oily','frequently_eat','normalized_price','most_frequently'))
        writer.writerows(lines)
sushi_df = pd.read_csv('sushi_features.csv')
sushi_df.head()

Unnamed: 0,item_ID,name,style,major_group,minor_group,oily,frequently_eat,normalized_price,most_frequently
0,0,ebi,1,0,6,2.728978,2.138422,1.83842,0.84
1,1,anago,1,0,3,0.926384,1.990228,1.992459,0.88
2,2,maguro,1,0,1,1.769559,2.348506,1.874725,0.88
3,3,ika,1,0,5,2.688401,2.04324,1.515152,0.92
4,4,uni,1,0,8,0.813043,1.643478,3.287282,0.88


In [6]:
with open('Sushi_data/Original_datasets/sushi3.udata', 'r') as in_file:
    stripped = (line.strip() for line in in_file)
    lines = (line.split("\t") for line in stripped if line)
    with open('customer_features.csv', 'w') as out_file:
        writer = csv.writer(out_file)
        writer.writerow(('user_ID', 'gender','age','total_time','prefecture_ID_past','region_ID','east_or_west','prefecture_ID_now','regional_ID_now','east_or_west_now','unchange_location'))
        writer.writerows(lines)
customer_df = pd.read_csv('customer_features.csv')
customer_df.head()

Unnamed: 0,user_ID,gender,age,total_time,prefecture_ID_past,region_ID,east_or_west,prefecture_ID_now,regional_ID_now,east_or_west_now,unchange_location
0,6371,0,2,355,0,0,0,0,0,0,0
1,10007,1,1,214,26,6,1,26,6,1,0
2,1777,1,2,363,29,6,1,26,6,1,1
3,3613,0,4,395,40,9,1,26,6,1,1
4,8081,1,1,707,26,6,1,36,8,1,1


Exploratory Data Analysis<br>
- Users appear to be equally represented - gender, age, region <br>
- Items are equally represented ranging from different styles, groups (fish, shrimp, clam, etc), oiliness, popularity<br>
- The user / item rating matrix is pretty sparse. There are 100 different items (different types of sushi) and 5000 users who rated the sushi. Many of the items were not given ratings. 
<br><br>
Here we calculate the sparsity of the user item matrix



In [7]:
ratings = pd.read_csv("Sushi_data/sushi_ratings_data.csv")
ratings = ratings.drop(columns=['user_id'])
columns = list(ratings.columns)
print("Number of Missing Ratings")
ratings[columns].eq(-1).sum()

Number of Missing Ratings


ebi (shrimp)                           3454
anago (sea eel)                        3465
maguro (tuna)                          3594
ika (squid)                            3543
uni (sea urchin)                       3620
                                       ... 
karei (flounder)                       4905
hiramasa (something like amberjack)    4932
namako (sea cucumber)                  4921
shishamo (smelt)                       4925
kaki (oyster)                          4923
Length: 100, dtype: int64

Read in all of the csv files and set up all of the dataframes to be used. <br>
Make a copy of the sushi ratings without the user id column, to be used later by the model.<br> 
The lightfm model needs specific input, this will help prepare for it.

In [8]:
customer_features_df = pd.read_csv("Sushi_data/customer_features.csv")
sushi_features_df = pd.read_csv("Sushi_data/sushi_features.csv")
sushi_ratings_df = pd.read_csv("Sushi_data/sushi_ratings_data.csv")

sushi_ratings_no_user = sushi_ratings_df.drop(columns=['user_id'])

customer_features_df.head()
sushi_features_df.head()
sushi_ratings_df.head()
sushi_ratings_no_user.head()

Unnamed: 0,ebi (shrimp),anago (sea eel),maguro (tuna),ika (squid),uni (sea urchin),tako (octopus),ikura (salmon roe),tamago (egg),toro (fatty tuna),amaebi (AMA shrimp),...,hoya (ascidian),battera (OSHIZUSHI style mackerel),kyabia (caviar),karasumi (dried mullet roe),uni_kurage (sea urchin & jellyfish),karei (flounder),hiramasa (something like amberjack),namako (sea cucumber),shishamo (smelt),kaki (oyster)
0,-1,0,-1,4,2,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1,-1,-1,-1,-1,-1,-1,0,-1,1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,-1,3,4,-1,-1,-1,3,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,4,-1,-1,3,4,1,-1,-1,4,3,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,-1,-1,-1,-1,1,-1,-1,-1,-1,4,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,0


Set up lists of sushi Id's and user Id's for model use later. <br>
The lightfm model needs specific input, this will help prepare for it.

In [9]:
sushi_id = list(range(0,100))
user_id = list(range(0,5000))

print(sushi_id[:10])
print(user_id[:10])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


Find all interactions where an interaction is defined as someone who positively rated a sushi (greater than 2)
Returns the interaction matrix where if a user rated a sushi > 2 then it will be have a 'True' in that cell, otherwise it will have 'False'
This will be used to get the interaction list next.

In [10]:
df= sushi_ratings_no_user >= 3
df.head()

Unnamed: 0,ebi (shrimp),anago (sea eel),maguro (tuna),ika (squid),uni (sea urchin),tako (octopus),ikura (salmon roe),tamago (egg),toro (fatty tuna),amaebi (AMA shrimp),...,hoya (ascidian),battera (OSHIZUSHI style mackerel),kyabia (caviar),karasumi (dried mullet roe),uni_kurage (sea urchin & jellyfish),karei (flounder),hiramasa (something like amberjack),namako (sea cucumber),shishamo (smelt),kaki (oyster)
0,False,False,False,True,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,True,True,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,True,False,False,True,True,False,False,False,True,True,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False


Create interaction_list.<br>
Interaction_list is a list of tuples that represent whether a user liked an item - find all the 'True' values from df
For example [(1,6),(1,8),(2,2)...] represents user 1 liked item 6 and 8, while user 2 liked item 2...
This interaction list is necessary for lightfm to make its recommendation model.<br>

Create and Calculare Weight_list<br>
Weight list will get the actual ratings of each True item in df, from the original sushi_ratings_df. These ratings will be used as the weights for the lightfm model<br>

Combine interaction_list with weight_list to make a new list called interaction_weight_list. This is a list of tuples which contain a positively rated sushi and its rating<br>
e.g [(1,4,3), (1,23,4),...] means user 1 gave sushi 4 a rating of 3 and user 1 gave sushi 23 a rating of 4 ...

In [11]:
interaction_list = [(df[col][df[col].eq(True)].index[i], df.columns.get_loc(col)) for col in df.columns for i in range(len(df[col][df[col].eq(True)].index))]
weight_list = []


# get the scores of each True item in matrix 
# the scores will be used as weights for the lightfm model
for x in interaction_list:
    weight = sushi_ratings_no_user.iloc[x] / 4
    weight_list.append((weight,))

interaction_weight_list = []
for (x, y) in zip(interaction_list, weight_list):
    interaction_weight_list.append(x + y)

interaction_weight_list


[(3, 0, 1.0),
 (5, 0, 1.0),
 (8, 0, 1.0),
 (15, 0, 0.75),
 (16, 0, 1.0),
 (19, 0, 0.75),
 (23, 0, 0.75),
 (26, 0, 1.0),
 (29, 0, 0.75),
 (38, 0, 0.75),
 (41, 0, 1.0),
 (44, 0, 1.0),
 (45, 0, 1.0),
 (47, 0, 1.0),
 (48, 0, 1.0),
 (51, 0, 1.0),
 (52, 0, 0.75),
 (58, 0, 1.0),
 (59, 0, 0.75),
 (60, 0, 0.75),
 (61, 0, 1.0),
 (63, 0, 1.0),
 (74, 0, 1.0),
 (78, 0, 1.0),
 (80, 0, 1.0),
 (81, 0, 0.75),
 (98, 0, 0.75),
 (105, 0, 1.0),
 (111, 0, 1.0),
 (114, 0, 1.0),
 (116, 0, 1.0),
 (118, 0, 1.0),
 (119, 0, 0.75),
 (120, 0, 0.75),
 (121, 0, 0.75),
 (130, 0, 1.0),
 (135, 0, 1.0),
 (142, 0, 1.0),
 (149, 0, 0.75),
 (154, 0, 0.75),
 (156, 0, 1.0),
 (162, 0, 0.75),
 (164, 0, 1.0),
 (165, 0, 1.0),
 (168, 0, 0.75),
 (170, 0, 1.0),
 (171, 0, 1.0),
 (175, 0, 0.75),
 (180, 0, 1.0),
 (188, 0, 1.0),
 (194, 0, 1.0),
 (201, 0, 1.0),
 (207, 0, 1.0),
 (214, 0, 1.0),
 (224, 0, 0.75),
 (227, 0, 0.75),
 (229, 0, 1.0),
 (230, 0, 0.75),
 (231, 0, 0.75),
 (236, 0, 1.0),
 (237, 0, 1.0),
 (238, 0, 1.0),
 (239, 0, 1.0),


Creating a lightfm dataset using the user_id list (0-4999) and the sushi_id list (0-99)<br>
The lightfm dataset is a sparse matrix, which is a matrix representation to save space.<br>
This dataset will be used by lightfm model to make the recommendation system

In [12]:
#creating a lightfm dataset using the list of user_id (0-4999) and list of sushi_id(0-99)
dataset = Dataset()
dataset.fit(
    set(user_id), 
    set(sushi_id)
    )

interactions, weights = dataset.build_interactions(
    interaction_weight_list)

Split the dataset into train and test datasets

In [13]:
train_split, test_split = cross_validation.random_train_test_split(weights, test_percentage = 0.25, random_state = 2022 )
train_split
test_split

<5000x100 sparse matrix of type '<class 'numpy.float32'>'
	with 7527 stored elements in COOrdinate format>

Train a lightfm model and fit the test data

In [14]:
model = LightFM(
    no_components=150,
    learning_rate=0.05,
    loss='logistic',
    random_state=2023)

model.fit(
    train_split,
    epochs=10, verbose=True)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9


<lightfm.lightfm.LightFM at 0x7fb7be36e210>

Test model, and get different model metrics:<br>
AUC score<br>
Precision @ k <br>
Hit rate/recall @ k<br>
k = 10

In [15]:
train_auc = auc_score(model, test_split).mean()
precision = precision_at_k(model,test_split, train_split, k=10).mean()
hit_rate = recall_at_k(model, test_split, train_split, k = 10).mean()
train_auc
precision
hit_rate

0.4058397479912785

Find the top 10 highest rates sushis<br>
Calculate a Popularity Baseline hit rate @ 10 to compare the recommendation model to<br>
Basline model just recommends the top 10 items with the highest average ratings

In [16]:
#preparing Popularity Baseline
test_set_likes = {key: [] for key in test_split.nonzero()[0]}
for test_row, test_col in zip(*test_split.nonzero()):
    test_set_likes[test_row].append(test_col)

temp = sushi_ratings_df.copy()
temp = temp.replace(-1,np.NaN)
temp = temp.describe().drop(columns='user_id')
mean_rate = temp.transpose().reset_index().rename(columns={'index':'sushi'})
mean_rate = mean_rate.sort_values(by=['mean'],ascending=False)
top_ten = mean_rate['sushi'][:10].reset_index()
top_ten = top_ten['index'].tolist()

print("top 10 sushis:")
print(top_ten)

#calculating baseline popularity (top 10 highest avg rated sushis)
Total_hit_ = 0
Total_test_case_ = 0
for user_x in test_set_likes.keys():
    hit = 0
    for a in test_set_likes[user_x]:
        Total_test_case_ += 1
        if a in top_ten:
            hit += 1
            Total_hit_ += 1

popular_hit_rate_ = Total_hit_/ Total_test_case_
print("Baseline hitrate @ 10 is {}".format(popular_hit_rate_))



top 10 sushis:
[19, 8, 37, 2, 47, 9, 61, 0, 79, 22]
Baseline hitrate @ 10 is 0.2344891723130065


Get a list of recommendations for a specific user in our dataset. <br>
Lightfm's predict returns a list of scores of the likelihood that an item should be recommended to a user.<br>
The higher the score, the higher the recommendation.<br>
Here, we will get the list of scores, and then order them from highest to lowest (top recommended to lowest recommended)<br>
We will then compare the recommendations to the test set to see if a recommendation was a correct 

In [17]:
user_x = 100
n_users, n_items = interactions.shape

#returns a list of recommendation scores of all 100 items 
prediction_scores = list(model.predict(user_x, np.arange(n_items)))
#returns the index of the recommendation scores sorted from highest to lowest 
predictions = np.flip(list(np.argsort(prediction_scores)))

# create a dictionary where we match each item number to sushi name 
# so when we make a reccomendation instead of returning 5 we can give back item 5's sushi name
col_names = (list(df.columns))
sushi_names = dict(zip(sushi_id, col_names))

counter = 0
print("Recommendations for user {:d}:".format(user_x))
for x in range(10):
    value = predictions[counter]
    sushi = sushi_names[value]
    print("     " + sushi)

    counter+=1

#print User's likes from the test set 
print("Test set for user {:d}:".format(user_x))

#test_set_likes is a dictionary key is user value is list of likes frommthe test split
test_set_likes = {key: [] for key in test_split.nonzero()[0]}
for test_row, test_col in zip(*test_split.nonzero()):
    test_set_likes[test_row].append(test_col)

if user_x in test_set_likes.keys():
    for a in test_set_likes[user_x]:
        sushi = a
        print(sushi_names[sushi])
else:
    print("Test set was empty - not enough likes for user to split 20%")

Recommendations for user 100:
     ebi (shrimp)
     maguro (tuna)
     anago (sea eel)
     toro (fatty tuna)
     amaebi (AMA shrimp)
     ikura (salmon roe)
     ika (squid)
     hotategai (scallop)
     tako (octopus)
     tai (sea bream)
Test set for user 100:
awabi (abalone)
anago (sea eel)
tai (sea bream)


We want to test our model on differnt slices of data to make sure that it does not perform differently on differnt input.<br>
In the metaflow, we tested on many different slices.<br>
But here, I will show one example, where we test our model on males vs females, to see if it peforms equally the same on both

Start by finding all the users that are female and all the users that are male<br>
Create two ratings tables, one where you drop all females, and one where you drop all males


In [18]:
gender = list(customer_features_df.loc[:,'gender'])
        
males = []
females = []
iterator = 0
for i in range(len(gender)):
    if(gender[iterator] == 0):
        males.append(iterator)
    else:
        females.append(iterator)
    iterator+=1

sushi_ratings_no_user_males_only = sushi_ratings_no_user.drop(index= females)
sushi_ratings_no_user_females_only = sushi_ratings_no_user.drop(index= males)

Split the male and female data into training and test sets<br>
Fit the male test set and the female test set on two separate models<br>
Test the models and Calculate metrics

In [19]:
positive_rankings= sushi_ratings_no_user_males_only >= 3
interaction_list = [(positive_rankings[col][positive_rankings[col].eq(True)].index[i], positive_rankings.columns.get_loc(col)) for col in positive_rankings.columns for i in range(len(positive_rankings[col][positive_rankings[col].eq(True)].index))]
weight_list = []


# find get the scores of each True item in matrix 
# the scores will be used as weights for the lightfm model_male
for x in interaction_list:
    weight = sushi_ratings_no_user.iloc[x] / 4
    weight_list.append((weight,))

interaction_weight_list = []
for (x, y) in zip(interaction_list, weight_list):
    interaction_weight_list.append(x + y)

#creating a lightfm dataset using the list of user_id (0-4999) and list of sushi_id(0-99)
dataset = Dataset()
dataset.fit(
    set(user_id), 
    set(sushi_id)
    )

interactions, weights = dataset.build_interactions(
    interaction_weight_list)

train_split, test_split = cross_validation.random_train_test_split(weights, test_percentage = 0.25, random_state = 2022 )


# Train/fit model_male
model_male = LightFM(
    no_components=150,
    learning_rate=0.05,
    loss='warp-kos',
    random_state=2023)

model_male.fit(
    train_split,
    epochs=10, verbose=True)

test_set_likes = {key: [] for key in test_split.nonzero()[0]}
for test_row, test_col in zip(*test_split.nonzero()):
    test_set_likes[test_row].append(test_col)


temp = sushi_ratings_df.copy()
temp = temp.replace(-1,np.NaN)
temp = temp.describe().drop(columns='user_id')
mean_rate = temp.transpose().reset_index().rename(columns={'index':'sushi'})
mean_rate = mean_rate.sort_values(by=['mean'],ascending=False)
top_ten = mean_rate['sushi'][:10].reset_index()
top_ten = top_ten['index'].tolist()

train_auc = auc_score(model_male, test_split).mean()
precision = precision_at_k(model_male, test_split, train_split, k=10).mean()
hit_rate = recall_at_k(model_male, test_split, train_split, k = 10).mean()

print("Male Metrics:")
print(train_auc)
print(precision)
print(hit_rate)


Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Male Metrics:
0.6713763
0.055284124
0.2881104114508257


In [20]:
positive_rankings= sushi_ratings_no_user_females_only >= 3
interaction_list = [(positive_rankings[col][positive_rankings[col].eq(True)].index[i], positive_rankings.columns.get_loc(col)) for col in positive_rankings.columns for i in range(len(positive_rankings[col][positive_rankings[col].eq(True)].index))]
weight_list = []


# find get the scores of each True item in matrix 
# the scores will be used as weights for the lightfm model_female
for x in interaction_list:
    weight = sushi_ratings_no_user.iloc[x] / 4
    weight_list.append((weight,))

interaction_weight_list = []
for (x, y) in zip(interaction_list, weight_list):
    interaction_weight_list.append(x + y)

#creating a lightfm dataset using the list of user_id (0-4999) and list of sushi_id(0-99)
dataset = Dataset()
dataset.fit(
    set(user_id), 
    set(sushi_id)
    )

interactions, weights = dataset.build_interactions(
    interaction_weight_list)

train_split, test_split = cross_validation.random_train_test_split(weights, test_percentage = 0.25, random_state = 2022 )


# Train/fit model_female
model_female = LightFM(
    no_components=150,
    learning_rate=0.05,
    loss='warp-kos',
    random_state=2023)

model_female.fit(
    train_split,
    epochs=10, verbose=True)

test_set_likes = {key: [] for key in test_split.nonzero()[0]}
for test_row, test_col in zip(*test_split.nonzero()):
    test_set_likes[test_row].append(test_col)


temp = sushi_ratings_df.copy()
temp = temp.replace(-1,np.NaN)
temp = temp.describe().drop(columns='user_id')
mean_rate = temp.transpose().reset_index().rename(columns={'index':'sushi'})
mean_rate = mean_rate.sort_values(by=['mean'],ascending=False)
top_ten = mean_rate['sushi'][:10].reset_index()
top_ten = top_ten['index'].tolist()

train_auc = auc_score(model_female, test_split).mean()
precision = precision_at_k(model_female, test_split, train_split, k=10).mean()
hit_rate = recall_at_k(model_female, test_split, train_split, k = 10).mean()

print("Female Metrics:")
print(train_auc)
print(precision)
print(hit_rate)


Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Female Metrics:
0.678687
0.051103484
0.2795161026647049


As you can see our model performs equally on male and female users