# Building an Recommendation Engine

In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [27]:
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances
from sklearn.metrics.pairwise import cosine_distances, cosine_similarity

In [None]:
df = pd.read_csv('final_recom')

In [28]:
ratings_df = pd.melt(df, id_vars=['date_num'], var_name='archetype')

In [16]:
ratings_df

Unnamed: 0,date_num,archetype,value
0,42055,Rainbow,1
1,41873,Rainbow,1
2,41862,Rainbow,1
3,41770,Rainbow,1
4,41687,Rainbow,1
5,41622,Rainbow,1
6,41596,Rainbow,1
7,41427,Rainbow,1
8,41418,Rainbow,1
9,41357,Rainbow,1


In [19]:
df2 = ratings_df.dropna()
rainbow_users = df2[df2.archetype == 'Rainbow']
affinity_users = df2[df2.archetype == 'Affinity']

In [24]:
common_users = set(rainbow_users.date_num).intersection(affinity_users.date_num)
rainbow_ratings = df2[df2.date_num.isin(common_users) & (df2.archetype == 'Rainbow')].value
affinity_ratings = df2[df2.date_num.isin(common_users) & (df2.archetype == 'Affinity')].value

In [25]:
cosine_similarity(rainbow_ratings, affinity_ratings)

ValueError: Incompatible dimension for X and Y matrices: X.shape[1] == 30 while Y.shape[1] == 69

In [35]:
def calculate_similarity(df, item1, item2, sim_func):
    df = df.dropna()
    item1_users = df[df['Coffee'] == item1].Name.unique()
    item2_users = df[df['Coffee'] == item2].Name.unique()
    common_users = set(item1_users).intersection(item2_users)
    user_mask = (df['Name'].isin(common_users))
    item1_mask = (df['Coffee'] == item1)
    item2_mask = (df['Coffee'] == item2)
    sim_value = sim_func(df[user_mask & item1_mask].value, df[user_mask & item2_mask].value)
    return sim_value[0][0]

In [38]:
calculate_similarity(ratings_df, 'Starbucks', 'PhilzCoffee', cosine_similarity)

0.88099219370405979

In [47]:
def recommended_place(ratings_df, myshop):
    sim_items = []
    for shop in ratings_df.Coffee.unique():
        sim_value = calculate_similarity(ratings_df, myshop, shop, cosine_similarity)
        sim_items.append((shop, sim_value))
    return sim_items

In [48]:
sim_ratings = recommended_place(ratings_df, 'PhilzCoffee')
sim_ratings.sort(key=lambda x: x[1], reverse=True)
sim_ratings

[('PhilzCoffee', 1.0000000000000002),
 ('BlueBottleCoffee', 0.98664715978766304),
 ('GrandCoffee', 0.98020440073623694),
 ('CaffeTrieste', 0.94243557789476951),
 ('PeetsTea', 0.92983969648564635),
 ('FourBarrel', 0.91654696409106085),
 ('RitualCoffee', 0.91447237794405922),
 ('Starbucks', 0.88099219370405979),
 ('WorkshopCafe', 0.8710790642362547)]