In [2]:
import pandas as pd
import numpy as np
import requests
import sqlite3
from collections import defaultdict

database_file = 'kitchenstyles.db'

In [3]:
# get kitchen style matrix
kitchens = pd.read_csv('sample_data/kitchen_style_kitchens.tsv', sep='\t').set_index('kitchen')
kitchens

Unnamed: 0_level_0,style_1,style_2,style_3
kitchen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1,0,0
2,1,0,0
3,0,1,0
4,0,1,0
5,0,0,1
6,0,0,1


In [4]:
conn = sqlite3.connect(database_file)
kitchens_from_sql = pd.read_sql_query('select * from image_attributes', conn)
kitchens_from_sql

Unnamed: 0,id,image_id,attribute
0,1,1,style_1
1,2,2,style_1
2,3,3,style_2
3,4,4,style_2
4,5,5,style_3
5,6,6,style_3


In [5]:
kitchens_from_sql['values'] = 1
kitchens_from_sql.pivot(index='image_id', columns='attribute', values='values').fillna(0)

attribute,style_1,style_2,style_3
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.0,0.0,0.0
2,1.0,0.0,0.0
3,0.0,1.0,0.0
4,0.0,1.0,0.0
5,0.0,0.0,1.0
6,0.0,0.0,1.0


In [6]:
kitchen_dict = defaultdict(list)
for i, row in kitchens_from_sql.iterrows():
    kitchen_dict[row['image_id']].append(row['attribute'])
kitchen_dict

defaultdict(list,
            {1: ['style_1'],
             2: ['style_1'],
             3: ['style_2'],
             4: ['style_2'],
             5: ['style_3'],
             6: ['style_3']})

In [7]:
# get user kitchen preference (i.e. which users liked which kitchens). columns are kitchen image ids.
users = pd.read_csv('sample_data/kitchen_style_users.tsv', sep='\t').set_index('user')
users

Unnamed: 0_level_0,1,2,3,4,5,6
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,1,0,1,0,1
2,1,1,1,1,0,0
3,0,0,0,1,1,0
4,1,1,1,1,0,1
5,0,0,0,1,1,1
6,1,1,0,0,0,0
7,1,0,1,1,1,1
8,0,0,1,1,0,0
9,0,1,0,0,0,0


In [8]:
users_from_sql = pd.read_sql_query('select * from user_image_feedback where user_id=1', conn)
users_from_sql

Unnamed: 0,id,user_id,image_id,feedback
0,1,1,1,1
1,6,1,2,1
2,15,1,4,1
3,25,1,6,1


In [9]:
user_dict = defaultdict(int)

image_count = 0
for i, row in users_from_sql.iterrows():
    image_id = row['image_id']
    attributes = kitchen_dict[image_id]
    for attribute in attributes:
        user_dict[attribute] += row['feedback']
        image_count += 1
user_dict

defaultdict(int, {'style_1': 2, 'style_2': 1, 'style_3': 1})

In [10]:
# translate user kitchen preferences to kitchen styles preferences
user_style = users.dot(kitchens.to_numpy())
user_style.columns = kitchens.columns
user_style

Unnamed: 0_level_0,style_1,style_2,style_3
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2,1,1
2,2,2,0
3,0,1,1
4,2,2,1
5,0,1,2
6,2,0,0
7,1,2,2
8,0,2,0
9,1,0,0


In [11]:
user_style.mean()

style_1    1.111111
style_2    1.222222
style_3    0.777778
dtype: float64

In [12]:
# create a normalized version of the user/style matrix
user_style.div(user_style.sum(axis=1), axis=0)

Unnamed: 0_level_0,style_1,style_2,style_3
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.5,0.25,0.25
2,0.5,0.5,0.0
3,0.0,0.5,0.5
4,0.4,0.4,0.2
5,0.0,0.333333,0.666667
6,1.0,0.0,0.0
7,0.2,0.4,0.4
8,0.0,1.0,0.0
9,1.0,0.0,0.0


In [13]:
# what kitchens are most similar to each other?
kitchen_similarity = users.T.dot(users)
kitchen_similarity

Unnamed: 0,1,2,3,4,5,6
1,5,4,3,4,1,3
2,4,5,2,3,0,2
3,3,2,4,4,1,2
4,4,3,4,7,3,4
5,1,0,1,3,3,2
6,3,2,2,4,2,4


In [14]:
# the normalized version of kitchen similarity
kitchen_mag = np.sqrt((users * users).sum())
kitchen_mag_dot = np.reshape(kitchen_mag.to_numpy(), (6, 1)).dot(np.reshape(kitchen_mag.to_numpy(), (1,6)))
kitchen_similarity / kitchen_mag_dot

Unnamed: 0,1,2,3,4,5,6
1,1.0,0.8,0.67082,0.676123,0.258199,0.67082
2,0.8,1.0,0.447214,0.507093,0.0,0.447214
3,0.67082,0.447214,1.0,0.755929,0.288675,0.5
4,0.676123,0.507093,0.755929,1.0,0.654654,0.755929
5,0.258199,0.0,0.288675,0.654654,1.0,0.57735
6,0.67082,0.447214,0.5,0.755929,0.57735,1.0


In [15]:
# style similarity
style_similarity = user_style.T.dot(user_style)
style_mag = np.sqrt((user_style * user_style).sum())
style_mag_dot = np.reshape(style_mag.to_numpy(), (3, 1)).dot(np.reshape(style_mag.to_numpy(), (1, 3)))
style_similarity / style_mag_dot

Unnamed: 0,style_1,style_2,style_3
style_1,1.0,0.648886,0.426401
style_2,0.648886,1.0,0.691714
style_3,0.426401,0.691714,1.0
