# First Recommender Attempt

In [1]:
import pandas as pd
import numpy as np
from scipy import sparse

from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

%matplotlib inline

In [2]:
ratings = pd.read_csv('./data/veggie_ratings.csv')
ratings.head()

Unnamed: 0,Timestamp,Artichokes,Arugula,Asparagus,Bush beans (green beans),Pole beans(green beans),Beets,Bok Choy,Broccoli,Brussel Sprouts,...,Radishes,Rhubarb,Rutabaga,Shallots,Spinach,Summer Squash,Winter Squash,Swiss Chard,Tomatoes,Turnips
0,10/12/2021 17:47:21,5,5,5,4,4,1,3,5,5,...,1,2,2,4,5,1,1,3,5,2
1,10/12/2021 17:57:50,2,4,2,4,2,1,1,3,1,...,1,1,1,3,5,1,1,1,4,1
2,10/12/2021 18:17:26,3,3,5,3,3,1,1,4,3,...,1,2,1,5,5,1,1,1,4,1
3,10/12/2021 18:18:15,4,2,1,2,2,2,3,3,3,...,3,2,2,2,2,2,2,2,1,2
4,10/12/2021 18:21:26,3,2,5,5,5,1,1,5,1,...,1,1,1,1,1,1,1,1,5,1


In [3]:
ratings['user_id'] = ratings.index
ratings.drop(columns='Timestamp', inplace=True)

In [4]:
veg = ratings.columns[0:-1]
veg

Index(['Artichokes', 'Arugula', 'Asparagus', 'Bush beans (green beans)',
       'Pole beans(green beans)', 'Beets', 'Bok Choy', 'Broccoli',
       'Brussel Sprouts', 'Cabbage', 'Carrots', 'Cauliflower', 'Celery',
       'Chives', 'Collards', 'Corn', 'Cucumbers', 'Eggplant', 'Garlic', 'Kale',
       'Leeks', 'Lettuce', 'Mustard Greens', 'Onions', 'Parsnips', 'Peas',
       'Peppers', 'Potatoes', 'Pumpkin', 'Radishes', 'Rhubarb', 'Rutabaga',
       'Shallots', 'Spinach', 'Summer Squash', 'Winter Squash', 'Swiss Chard',
       'Tomatoes', 'Turnips'],
      dtype='object')

In [5]:
veg_dict = {k: v for v, k in enumerate(veg)}
veg_dict

{'Artichokes': 0,
 'Arugula': 1,
 'Asparagus': 2,
 'Bush beans (green beans)': 3,
 'Pole beans(green beans)': 4,
 'Beets': 5,
 'Bok Choy': 6,
 'Broccoli': 7,
 'Brussel Sprouts': 8,
 'Cabbage': 9,
 'Carrots': 10,
 'Cauliflower': 11,
 'Celery': 12,
 'Chives': 13,
 'Collards': 14,
 'Corn': 15,
 'Cucumbers': 16,
 'Eggplant': 17,
 'Garlic': 18,
 'Kale': 19,
 'Leeks': 20,
 'Lettuce': 21,
 'Mustard Greens': 22,
 'Onions': 23,
 'Parsnips': 24,
 'Peas': 25,
 'Peppers': 26,
 'Potatoes': 27,
 'Pumpkin': 28,
 'Radishes': 29,
 'Rhubarb': 30,
 'Rutabaga': 31,
 'Shallots': 32,
 'Spinach': 33,
 'Summer Squash': 34,
 'Winter Squash': 35,
 'Swiss Chard': 36,
 'Tomatoes': 37,
 'Turnips': 38}

In [6]:
ratings = ratings.melt(id_vars = 'user_id')

In [7]:
ratings.shape

(4875, 3)

In [8]:
ratings.head()

Unnamed: 0,user_id,variable,value
0,0,Artichokes,5
1,1,Artichokes,2
2,2,Artichokes,3
3,3,Artichokes,4
4,4,Artichokes,3


In [9]:
ratings['veggie_id'] = ratings['variable'].map(veg_dict)

In [10]:
ratings.head()

Unnamed: 0,user_id,variable,value,veggie_id
0,0,Artichokes,5,0
1,1,Artichokes,2,0
2,2,Artichokes,3,0
3,3,Artichokes,4,0
4,4,Artichokes,3,0


In [11]:
ratings = ratings.rename(columns={'variable': 'veggie_name', 'value': 'rating'})

In [12]:
ratings.head()

Unnamed: 0,user_id,veggie_name,rating,veggie_id
0,0,Artichokes,5,0
1,1,Artichokes,2,0
2,2,Artichokes,3,0
3,3,Artichokes,4,0
4,4,Artichokes,3,0


In [13]:
pivot = pd.pivot_table(ratings, index='veggie_name', columns='user_id', values='rating')
pivot.head()

user_id,0,1,2,3,4,5,6,7,8,9,...,115,116,117,118,119,120,121,122,123,124
veggie_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Artichokes,5,2,3,4,3,4,4,4,5,2,...,1,4,2,5,3,5,5,1,4,3
Arugula,5,4,3,2,2,1,2,5,5,3,...,1,5,1,1,2,5,2,4,2,4
Asparagus,5,2,5,1,5,4,4,5,2,5,...,1,5,4,5,5,5,5,4,3,5
Beets,1,1,1,2,1,4,1,4,5,1,...,1,5,1,1,1,2,2,1,4,2
Bok Choy,3,1,1,3,1,4,4,4,1,5,...,1,4,1,5,3,5,1,1,3,3


In [14]:
sparse_pivot = sparse.csr_matrix(pivot)

In [15]:
dists = cosine_distances(sparse_pivot)
dists

array([[0.        , 0.10090689, 0.0960003 , ..., 0.11025035, 0.13839955,
        0.12054969],
       [0.10090689, 0.        , 0.06951891, ..., 0.08271811, 0.14015046,
        0.11921901],
       [0.0960003 , 0.06951891, 0.        , ..., 0.06335072, 0.14632511,
        0.09233123],
       ...,
       [0.11025035, 0.08271811, 0.06335072, ..., 0.        , 0.14420635,
        0.09115027],
       [0.13839955, 0.14015046, 0.14632511, ..., 0.14420635, 0.        ,
        0.11312013],
       [0.12054969, 0.11921901, 0.09233123, ..., 0.09115027, 0.11312013,
        0.        ]])

In [16]:
similarities = cosine_similarity(sparse_pivot)

In [17]:
similarities

array([[1.        , 0.89909311, 0.9039997 , ..., 0.88974965, 0.86160045,
        0.87945031],
       [0.89909311, 1.        , 0.93048109, ..., 0.91728189, 0.85984954,
        0.88078099],
       [0.9039997 , 0.93048109, 1.        , ..., 0.93664928, 0.85367489,
        0.90766877],
       ...,
       [0.88974965, 0.91728189, 0.93664928, ..., 1.        , 0.85579365,
        0.90884973],
       [0.86160045, 0.85984954, 0.85367489, ..., 0.85579365, 1.        ,
        0.88687987],
       [0.87945031, 0.88078099, 0.90766877, ..., 0.90884973, 0.88687987,
        1.        ]])

In [18]:
recommender_df = pd.DataFrame(dists, columns=pivot.index, index=pivot.index)
recommender_df.head()

veggie_name,Artichokes,Arugula,Asparagus,Beets,Bok Choy,Broccoli,Brussel Sprouts,Bush beans (green beans),Cabbage,Carrots,...,Radishes,Rhubarb,Rutabaga,Shallots,Spinach,Summer Squash,Swiss Chard,Tomatoes,Turnips,Winter Squash
veggie_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Artichokes,0.0,0.100907,0.096,0.155125,0.113214,0.099324,0.111638,0.121389,0.115719,0.113122,...,0.123694,0.156485,0.123201,0.097834,0.096716,0.116625,0.112679,0.11025,0.1384,0.12055
Arugula,0.100907,0.0,0.069519,0.144015,0.13404,0.079348,0.085539,0.094629,0.104827,0.091337,...,0.113072,0.126614,0.137019,0.078349,0.065999,0.107274,0.120956,0.082718,0.14015,0.119219
Asparagus,0.096,0.069519,0.0,0.162596,0.110035,0.047394,0.073614,0.059927,0.079939,0.0603,...,0.119069,0.122251,0.118955,0.083974,0.042015,0.077114,0.127524,0.063351,0.146325,0.092331
Beets,0.155125,0.144015,0.162596,0.0,0.150132,0.174875,0.148601,0.157619,0.152078,0.175084,...,0.112925,0.170194,0.13504,0.140444,0.156092,0.185146,0.144281,0.16197,0.131624,0.181067
Bok Choy,0.113214,0.13404,0.110035,0.150132,0.0,0.098938,0.118112,0.123444,0.112459,0.120789,...,0.128226,0.135218,0.0882,0.094901,0.10999,0.105838,0.104958,0.13373,0.123619,0.096847


In [19]:
recommender_df['Beets'].sort_values()[1:11]

veggie_name
Radishes           0.112925
Turnips            0.131624
Rutabaga           0.135040
Shallots           0.140444
Arugula            0.144015
Swiss Chard        0.144281
Brussel Sprouts    0.148601
Onions             0.149240
Bok Choy           0.150132
Leeks              0.151530
Name: Beets, dtype: float64

In [20]:
def recommend_veggie(search_term):
    titles = pivot.filter(like = search_term, axis='index').index
    
    for title in titles:
        print(title)
        print('Average rating:', pivot.loc[title].mean())
        print('Number of ratings:', pivot.loc[title].count())
        print()
        print('10 most similar veggies:')
        print(recommender_df[title].sort_values()[1:11])
        print('*' * 50)
        print()

In [21]:
recommend_veggie('Broccoli')

Broccoli
Average rating: 4.224
Number of ratings: 125

10 most similar veggies:
veggie_name
Bush beans (green beans)    0.034111
Garlic                      0.038349
Potatoes                    0.038438
Spinach                     0.039004
Pole beans(green beans)     0.043265
Lettuce                     0.043504
Peppers                     0.043970
Carrots                     0.044430
Onions                      0.046367
Asparagus                   0.047394
Name: Broccoli, dtype: float64
**************************************************

