In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)

import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

In [2]:
df = pd.read_csv("cuisine_customer.csv")

In [3]:
df.head()

Unnamed: 0,cuisine,Numbers of order,Customer_id
0,Fast Food,10,1
1,Cakes & Bakery,2,1
2,Western,1,1
3,Non-alcoholic Drinks,1,1
4,Chinese,4,1


In [4]:
df_food = df.groupby(by=["cuisine","Customer_id"]).sum()

In [5]:
df_food.reset_index(inplace=True)

In [6]:
df_food.head()

Unnamed: 0,cuisine,Customer_id,Numbers of order
0,American,1,2
1,American,2,6
2,American,3,1
3,American,4,2
4,American,5,4


## Create pivot table
---

Because we're creating an item-based collaborative recommender (where item in this case is our cuisine), we'll set up our pivot table as follows:
1. The `cuisine` will be the index
2. The `Customer_id` will be the column
3. The `Numbers of order` will be the value

In [7]:
pivot = df_food.pivot_table('Numbers of order',['cuisine'],'Customer_id')

In [8]:
pivot.to_csv("processed_customer.csv")

In [9]:
pivot.head()

Customer_id,1,2,3,4,5,6,7,8,9,10,...,91,92,93,94,95,96,97,98,99,100
cuisine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
American,2.0,6.0,1.0,2.0,4.0,,,,,2.0,...,,,2.0,,,2.0,1.0,,,
Asian,,,,,2.0,,,,,3.0,...,,3.0,,,,,,,8.0,
Burgers,,6.0,5.0,,,,,3.0,6.0,,...,,,,1.0,,,1.0,,2.0,1.0
Cakes & Bakery,2.0,,2.0,1.0,,1.0,,1.0,2.0,,...,,,1.0,,,,,,1.0,
Chicken,,,1.0,,1.0,,,,9.0,2.0,...,,,,,,2.0,1.0,,,2.0


## Create sparse matrix
---

In a minute, we'll calculate the cosine similarity for each movie using the `pairwise_distances` function. Before that, we need to create a sparse matrix (datatype) using `scipy`'s `sparse` module like so:
```python
sparse.csr_matrix(pivot.fillna(0), metric='cosine')
```

In [10]:
sparse_pivot = sparse.csr_matrix(pivot.fillna(0))
print(sparse_pivot)

  (0, 0)	2.0
  (0, 1)	6.0
  (0, 2)	1.0
  (0, 3)	2.0
  (0, 4)	4.0
  (0, 9)	2.0
  (0, 11)	1.0
  (0, 14)	3.0
  (0, 16)	1.0
  (0, 19)	1.0
  (0, 20)	1.0
  (0, 22)	1.0
  (0, 24)	2.0
  (0, 25)	1.0
  (0, 27)	5.0
  (0, 31)	1.0
  (0, 32)	2.0
  (0, 34)	1.0
  (0, 35)	3.0
  (0, 39)	2.0
  (0, 44)	2.0
  (0, 46)	1.0
  (0, 47)	2.0
  (0, 52)	1.0
  (0, 56)	4.0
  :	:
  (36, 59)	1.0
  (36, 61)	1.0
  (36, 62)	1.0
  (36, 63)	1.0
  (36, 65)	6.0
  (36, 66)	9.0
  (36, 69)	1.0
  (36, 70)	1.0
  (36, 72)	3.0
  (36, 73)	2.0
  (36, 74)	2.0
  (36, 75)	1.0
  (36, 77)	3.0
  (36, 79)	6.0
  (36, 82)	8.0
  (36, 83)	1.0
  (36, 84)	1.0
  (36, 88)	2.0
  (36, 89)	5.0
  (36, 90)	8.0
  (36, 91)	1.0
  (36, 94)	7.0
  (36, 96)	8.0
  (36, 97)	2.0
  (36, 98)	2.0


## Calculate cosine similarity
---

`sklearn` has a built-in `pairwise_distances` function that we can use for our recommender. It will return a square matrix, comparing every cuisine with every other cuisine in the dataset.

```python
pairwise_distances(sparse_pivot, metric='cosine')
cosine_distances(sparse_pivot)                     # Identical but more concise
```

In [11]:
# Note that a distance of 1 is a similarity of 0.
dists = pairwise_distances(sparse_pivot, metric='cosine')
# dists = cosine_distances(sparse_pivot)                         # Identical but more concise

dists

array([[0.        , 0.74213424, 0.71489221, ..., 0.98603003, 0.81705207,
        0.66796383],
       [0.74213424, 0.        , 0.76396359, ..., 0.83808265, 0.86476802,
        0.70514183],
       [0.71489221, 0.76396359, 0.        , ..., 0.92245965, 0.56480586,
        0.74481986],
       ...,
       [0.98603003, 0.83808265, 0.92245965, ..., 0.        , 1.        ,
        0.98548971],
       [0.81705207, 0.86476802, 0.56480586, ..., 1.        , 0.        ,
        0.70139109],
       [0.66796383, 0.70514183, 0.74481986, ..., 0.98548971, 0.70139109,
        0.        ]])

However, note that distance is not the same as similarity. For example, a similarity of 1 is a distance of 0! 

Because of this, the similarity is defined as 1 - dist. To compute this, we can use `cosine_similarity` instead.

In [12]:
# Here, similarity is 1 - distance.
similarities = cosine_similarity(sparse_pivot)

In [13]:
# Verify they are the same

np.all(np.isclose((1.0 - dists), similarities))

True

## Create distances DataFrame
---

At this point, we essentially have a recommender. We'll load it into a `pandas` DataFrame for readability. 

You'll notice that each cuisine has a "distance" of 0 with itself (along the diagonal).

In [14]:
recommender_df = pd.DataFrame(dists, 
                              columns=pivot.index, 
                              index=pivot.index)
recommender_df.head()

cuisine,American,Asian,Burgers,Cakes & Bakery,Chicken,Chinese,Desserts,European,Fast Food,Filipino,...,Pizza,Sandwiches,Seafood,Singaporean,Sushi,Thai,Turkish,Vegetarian,Vietnamese,Western
cuisine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
American,0.0,0.742134,0.714892,0.756345,0.741948,0.690753,0.76442,0.930306,0.681501,0.930257,...,0.772348,0.977252,0.804856,0.589968,1.0,0.822893,0.899481,0.98603,0.817052,0.667964
Asian,0.742134,0.0,0.763964,0.756904,0.882379,0.66853,0.825344,0.913452,0.658766,0.789331,...,0.699541,1.0,0.838443,0.67927,1.0,0.864237,0.988112,0.838083,0.864768,0.705142
Burgers,0.714892,0.763964,0.0,0.597746,0.729629,0.663054,0.664321,0.864606,0.761978,1.0,...,0.780246,0.944444,0.914895,0.662855,1.0,0.854365,1.0,0.92246,0.564806,0.74482
Cakes & Bakery,0.756345,0.756904,0.597746,0.0,0.781212,0.755863,0.622121,0.840636,0.633479,1.0,...,0.796306,0.968791,0.880477,0.524096,1.0,0.83501,1.0,0.744449,0.686252,0.802079
Chicken,0.741948,0.882379,0.729629,0.781212,0.0,0.838543,0.720151,0.94453,0.821063,0.949992,...,0.954484,0.917923,0.863174,0.729921,1.0,0.912988,0.893328,1.0,0.930662,0.617526


## Evaluate recommender performance
---

Now comes the fun part! Let's check out a few cuisines to see if the recommender aligns with our intuition. In the cell below we'll do the following:
1. Create a search term
2. Use that to find all titles matching the search query
3. For each cuisine, we'll list off the following:
  - The ten most similar cuisines

In [15]:
recommender_df.head()

cuisine,American,Asian,Burgers,Cakes & Bakery,Chicken,Chinese,Desserts,European,Fast Food,Filipino,...,Pizza,Sandwiches,Seafood,Singaporean,Sushi,Thai,Turkish,Vegetarian,Vietnamese,Western
cuisine,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
American,0.0,0.742134,0.714892,0.756345,0.741948,0.690753,0.76442,0.930306,0.681501,0.930257,...,0.772348,0.977252,0.804856,0.589968,1.0,0.822893,0.899481,0.98603,0.817052,0.667964
Asian,0.742134,0.0,0.763964,0.756904,0.882379,0.66853,0.825344,0.913452,0.658766,0.789331,...,0.699541,1.0,0.838443,0.67927,1.0,0.864237,0.988112,0.838083,0.864768,0.705142
Burgers,0.714892,0.763964,0.0,0.597746,0.729629,0.663054,0.664321,0.864606,0.761978,1.0,...,0.780246,0.944444,0.914895,0.662855,1.0,0.854365,1.0,0.92246,0.564806,0.74482
Cakes & Bakery,0.756345,0.756904,0.597746,0.0,0.781212,0.755863,0.622121,0.840636,0.633479,1.0,...,0.796306,0.968791,0.880477,0.524096,1.0,0.83501,1.0,0.744449,0.686252,0.802079
Chicken,0.741948,0.882379,0.729629,0.781212,0.0,0.838543,0.720151,0.94453,0.821063,0.949992,...,0.954484,0.917923,0.863174,0.729921,1.0,0.912988,0.893328,1.0,0.930662,0.617526


In [16]:
# Top 10 Recommended Cuisines that people should visit if they had previously had Pizza

cuisine_recommendations = recommender_df['Pizza'].sort_values(ascending=False)
cuisine_recommendations = pd.DataFrame(data = cuisine_recommendations)
cuisine_recommendations.head(3)

Unnamed: 0_level_0,Pizza
cuisine,Unnamed: 1_level_1
French,1.0
Turkish,1.0
Melaka Portuguese,1.0
