### Sample script for user-based collaborative filtering  

#### Import libraries

In [None]:
import numpy as np
import pandas as pd

#### Parameters  

In [None]:
csv_in = 'sushi3b.5000.10.score.csv'
# min number of common items between target user and users in DB
min_common_items = 4
# min number of users who evaluated an item to be recommended
min_eval_users = 3

# To show more rows and columns
pd.options.display.max_rows = 999 
pd.options.display.max_columns = 999 

#### Read CSV file  

In [None]:
df = pd.read_csv(csv_in, delimiter=',', skiprows=0, header=0)
print(df.shape)
display(df.info())
display(df.head())

#### Replace -1 into NaN (for not to be used during calculation)  

In [None]:
df = df.replace(-1, np.nan)
display(df.head())

#### Return Series of similiarity of users  
by using Pearson correlation coefficient (-1 .. 1).  

In [None]:
def get_sim_ser_by_pearson(df_users, ser_target):
    corr_all = df_users.corrwith(ser_target, axis=1).dropna()
    return corr_all

#### Prediction of scores for the target user  
(for items with no score)  
based on deviation from average  

In [None]:
def predict_scores(df_sim, ser_sim, ser_target):
    ret = {}
    df_sim_ave = df_sim.mean(axis=1)
    #print(df_sim.head())  # debug
    target_ave = ser_target.mean()
    #print(target_ave)  # debug
    for item1 in df_sim.columns:
        if item1 in ser_target.index: continue
        v1 = df_sim[item1]
        if v1.notnull().sum() < min_eval_users: continue
        v1 -= df_sim_ave
        v11 = v1[ v1.notnull() ]
        t11 = ser_sim[ v1.notnull() ]
        pred1 = target_ave + (v11 * t11).sum() / np.abs(t11).sum()
        ret[item1] = pred1
    
    ser_ret = pd.Series(ret)
    
    return ser_ret.sort_values(ascending=False)

#### Function for user-based collaborative filtering.  

arguments: dictionary of scores for the target user  

ex)
get_recomm_by_user_sim({'maguro':1, 'ika':1, 'uni':3,
                        'awabi':4, 'hirame':4, 'aoyagi':4})  
-> return list such as [('akagai', 2.9835603009918303), ('mirugai', 2.945676429588114), ...]

In [None]:
def get_recomm_by_user_sim(df, target_dic):
    ser_target = pd.Series(target_dic)
    # make dataframe with columns included in target_dic
    df_scores = df[ ser_target.index ]
    #display(df_scores)  # debug
    n_same_items = df_scores.notnull().sum(axis=1)
    #display(df_scores.notnull())  # debug
    df_common = df_scores[ n_same_items>=min_common_items ]
    #display(df_common)  # debug
    
    ser_sim = get_sim_ser_by_pearson(df_common, ser_target)    
    #print(ser_sim)  # debug
    df_sim = df.loc[ ser_sim.index ]
    #display(df_sim)  # debug
    recomm = predict_scores(df_sim, ser_sim, ser_target)

    return recomm

#### Do recommendation  

Number of items calculated: 46
Recommendation:
ankimo     4.176725
kohada     3.716200
mirugai    3.598203
shako      3.581244
akagai     3.539989
dtype: float64
Number of items calculated: 73
Recommendation:
toro_samon    3.993705
okura         3.607180
negi_toro     3.338903
tarabagani    3.312345
kanpachi      3.228670
dtype: float64

In [None]:
recomm = get_recomm_by_user_sim(df,
                                {'maguro':1, 'ika':1, 'uni':3,
                                 'awabi':4, 'hirame':4, 'aoyagi':4})
print('Number of items calculated:', len(recomm))
print('Recommendation:')
print(recomm.head())

recomm = get_recomm_by_user_sim(df,
                                {'anago': 3, 'maguro': 4, 'ikura': 3,
                                 'hamachi': 4, 'samon': 4, 'unagi': 3,
                                 'suzuki': 2, 'hamo': 1, 'nasu': 1,
                                 'shiso_maki': 2})
print('Number of items calculated:', len(recomm))
print('Recommendation:')
print(recomm.head())