In [1]:
import pandas as pd
import numpy as np

In [2]:
# get kitchen style matrix
kitchens = pd.read_csv('kitchen style - kitchens.tsv', sep='\t').set_index('kitchen')
kitchens

Unnamed: 0_level_0,style_1,style_2,style_3
kitchen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,0,0
2,1,0,0
3,0,1,0
4,0,1,0
5,0,0,1
6,0,0,1


In [3]:
# get user kitchen preference (i.e. which users liked which kitchens)
users = pd.read_csv('kitchen style - users.tsv', sep='\t').set_index('user')
users

Unnamed: 0_level_0,kitchen_1,kitchen_2,kitchen_3,kitchen_4,kitchen_5,kitchen_6
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,1,0,1,0,1
2,1,1,1,1,0,0
3,0,0,0,1,1,0
4,1,1,1,1,0,1
5,0,0,0,1,1,1
6,1,1,0,0,0,0
7,1,0,1,1,1,1
8,0,0,1,1,0,0
9,0,1,0,0,0,0


In [4]:
# translate user kitchen preferences to kitchen styles preferences
user_style = users.dot(kitchens.to_numpy())
user_style.columns = kitchens.columns
user_style

Unnamed: 0_level_0,style_1,style_2,style_3
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2,1,1
2,2,2,0
3,0,1,1
4,2,2,1
5,0,1,2
6,2,0,0
7,1,2,2
8,0,2,0
9,1,0,0


In [5]:
# create a normalized version of the user/style matrix
user_style.div(user_style.sum(axis=1), axis=0)

Unnamed: 0_level_0,style_1,style_2,style_3
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.5,0.25,0.25
2,0.5,0.5,0.0
3,0.0,0.5,0.5
4,0.4,0.4,0.2
5,0.0,0.333333,0.666667
6,1.0,0.0,0.0
7,0.2,0.4,0.4
8,0.0,1.0,0.0
9,1.0,0.0,0.0


In [6]:
# what kitchens are most similar to each other?
kitchen_similarity = users.T.dot(users)
kitchen_similarity

Unnamed: 0,kitchen_1,kitchen_2,kitchen_3,kitchen_4,kitchen_5,kitchen_6
kitchen_1,5,4,3,4,1,3
kitchen_2,4,5,2,3,0,2
kitchen_3,3,2,4,4,1,2
kitchen_4,4,3,4,7,3,4
kitchen_5,1,0,1,3,3,2
kitchen_6,3,2,2,4,2,4


In [7]:
# the normalized version of kitchen similarity
kitchen_mag = np.sqrt((users * users).sum())
kitchen_mag_dot = np.reshape(kitchen_mag.to_numpy(), (6, 1)).dot(np.reshape(kitchen_mag.to_numpy(), (1,6)))
kitchen_similarity / kitchen_mag_dot

Unnamed: 0,kitchen_1,kitchen_2,kitchen_3,kitchen_4,kitchen_5,kitchen_6
kitchen_1,1.0,0.8,0.67082,0.676123,0.258199,0.67082
kitchen_2,0.8,1.0,0.447214,0.507093,0.0,0.447214
kitchen_3,0.67082,0.447214,1.0,0.755929,0.288675,0.5
kitchen_4,0.676123,0.507093,0.755929,1.0,0.654654,0.755929
kitchen_5,0.258199,0.0,0.288675,0.654654,1.0,0.57735
kitchen_6,0.67082,0.447214,0.5,0.755929,0.57735,1.0


In [8]:
# style similarity
style_similarity = user_style.T.dot(user_style)
style_mag = np.sqrt((user_style * user_style).sum())
style_mag_dot = np.reshape(style_mag.to_numpy(), (3, 1)).dot(np.reshape(style_mag.to_numpy(), (1, 3)))
style_similarity / style_mag_dot

Unnamed: 0,style_1,style_2,style_3
style_1,1.0,0.648886,0.426401
style_2,0.648886,1.0,0.691714
style_3,0.426401,0.691714,1.0
