# Recommender Notebook
---

In [1]:
import pandas as pd
import numpy as np
from scipy import sparse

from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

%matplotlib inline

In [2]:
ratings = pd.read_csv('./data/veggie_ratings.csv')
ratings.head()

Unnamed: 0,Timestamp,Artichokes,Arugula,Asparagus,Bush beans (green beans),Pole beans(green beans),Beets,Bok Choy,Broccoli,Brussel Sprouts,...,Radishes,Rhubarb,Rutabaga,Shallots,Spinach,Summer Squash,Winter Squash,Swiss Chard,Tomatoes,Turnips
0,10/12/2021 17:47:21,5,5,5,4,4,1,3,5,5,...,1,2,2,4,5,1,1,3,5,2
1,10/12/2021 17:57:50,2,4,2,4,2,1,1,3,1,...,1,1,1,3,5,1,1,1,4,1
2,10/12/2021 18:17:26,3,3,5,3,3,1,1,4,3,...,1,2,1,5,5,1,1,1,4,1
3,10/12/2021 18:18:15,4,2,1,2,2,2,3,3,3,...,3,2,2,2,2,2,2,2,1,2
4,10/12/2021 18:21:26,3,2,5,5,5,1,1,5,1,...,1,1,1,1,1,1,1,1,5,1


In [3]:
ratings['user_id'] = ratings.index
ratings.drop(columns='Timestamp', inplace=True)

In [4]:
veg = ratings.columns[0:-1]
veg

Index(['Artichokes', 'Arugula', 'Asparagus', 'Bush beans (green beans)',
       'Pole beans(green beans)', 'Beets', 'Bok Choy', 'Broccoli',
       'Brussel Sprouts', 'Cabbage', 'Carrots', 'Cauliflower', 'Celery',
       'Chives', 'Collards', 'Corn', 'Cucumbers', 'Eggplant', 'Garlic', 'Kale',
       'Leeks', 'Lettuce', 'Mustard Greens', 'Onions', 'Parsnips', 'Peas',
       'Peppers', 'Potatoes', 'Pumpkin', 'Radishes', 'Rhubarb', 'Rutabaga',
       'Shallots', 'Spinach', 'Summer Squash', 'Winter Squash', 'Swiss Chard',
       'Tomatoes', 'Turnips'],
      dtype='object')

In [5]:
veg_dict = {k: v for v, k in enumerate(veg)}
veg_dict

{'Artichokes': 0,
 'Arugula': 1,
 'Asparagus': 2,
 'Bush beans (green beans)': 3,
 'Pole beans(green beans)': 4,
 'Beets': 5,
 'Bok Choy': 6,
 'Broccoli': 7,
 'Brussel Sprouts': 8,
 'Cabbage': 9,
 'Carrots': 10,
 'Cauliflower': 11,
 'Celery': 12,
 'Chives': 13,
 'Collards': 14,
 'Corn': 15,
 'Cucumbers': 16,
 'Eggplant': 17,
 'Garlic': 18,
 'Kale': 19,
 'Leeks': 20,
 'Lettuce': 21,
 'Mustard Greens': 22,
 'Onions': 23,
 'Parsnips': 24,
 'Peas': 25,
 'Peppers': 26,
 'Potatoes': 27,
 'Pumpkin': 28,
 'Radishes': 29,
 'Rhubarb': 30,
 'Rutabaga': 31,
 'Shallots': 32,
 'Spinach': 33,
 'Summer Squash': 34,
 'Winter Squash': 35,
 'Swiss Chard': 36,
 'Tomatoes': 37,
 'Turnips': 38}

In [6]:
ratings = ratings.melt(id_vars = 'user_id')

In [7]:
ratings.shape

(5226, 3)

In [8]:
ratings.head()

Unnamed: 0,user_id,variable,value
0,0,Artichokes,5
1,1,Artichokes,2
2,2,Artichokes,3
3,3,Artichokes,4
4,4,Artichokes,3


In [9]:
ratings['veggie_id'] = ratings['variable'].map(veg_dict)

In [10]:
ratings.head()

Unnamed: 0,user_id,variable,value,veggie_id
0,0,Artichokes,5,0
1,1,Artichokes,2,0
2,2,Artichokes,3,0
3,3,Artichokes,4,0
4,4,Artichokes,3,0


In [11]:
ratings = ratings.rename(columns={'variable': 'veggie_name', 'value': 'rating'})

In [12]:
ratings.head()

Unnamed: 0,user_id,veggie_name,rating,veggie_id
0,0,Artichokes,5,0
1,1,Artichokes,2,0
2,2,Artichokes,3,0
3,3,Artichokes,4,0
4,4,Artichokes,3,0


In [13]:
pivot = pd.pivot_table(ratings, index='veggie_name', columns='user_id', values='rating')
pivot.head()

user_id,0,1,2,3,4,5,6,7,8,9,...,124,125,126,127,128,129,130,131,132,133
veggie_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Artichokes,5,2,3,4,3,4,4,4,5,2,...,3,3,2,3,5,5,1,3,4,2
Arugula,5,4,3,2,2,1,2,5,5,3,...,4,3,3,4,1,3,2,3,1,2
Asparagus,5,2,5,1,5,4,4,5,2,5,...,5,5,4,5,5,3,2,4,3,5
Beets,1,1,1,2,1,4,1,4,5,1,...,2,3,1,5,1,1,2,1,2,5
Bok Choy,3,1,1,3,1,4,4,4,1,5,...,3,3,1,4,2,2,2,4,1,4


In [14]:
sparse_pivot = sparse.csr_matrix(pivot)

In [15]:
dists = cosine_distances(sparse_pivot)
dists

array([[0.        , 0.10662537, 0.09564753, ..., 0.10744898, 0.14118151,
        0.11712129],
       [0.10662537, 0.        , 0.07184178, ..., 0.08738546, 0.14670153,
        0.12323257],
       [0.09564753, 0.07184178, 0.        , ..., 0.06230751, 0.14093961,
        0.09054579],
       ...,
       [0.10744898, 0.08738546, 0.06230751, ..., 0.        , 0.13880848,
        0.0890425 ],
       [0.14118151, 0.14670153, 0.14093961, ..., 0.13880848, 0.        ,
        0.11349265],
       [0.11712129, 0.12323257, 0.09054579, ..., 0.0890425 , 0.11349265,
        0.        ]])

In [16]:
similarities = cosine_similarity(sparse_pivot)

In [17]:
similarities

array([[1.        , 0.89337463, 0.90435247, ..., 0.89255102, 0.85881849,
        0.88287871],
       [0.89337463, 1.        , 0.92815822, ..., 0.91261454, 0.85329847,
        0.87676743],
       [0.90435247, 0.92815822, 1.        , ..., 0.93769249, 0.85906039,
        0.90945421],
       ...,
       [0.89255102, 0.91261454, 0.93769249, ..., 1.        , 0.86119152,
        0.9109575 ],
       [0.85881849, 0.85329847, 0.85906039, ..., 0.86119152, 1.        ,
        0.88650735],
       [0.88287871, 0.87676743, 0.90945421, ..., 0.9109575 , 0.88650735,
        1.        ]])

In [18]:
recommender_df = pd.DataFrame(dists, columns=pivot.index, index=pivot.index)
recommender_df.head()

veggie_name,Artichokes,Arugula,Asparagus,Beets,Bok Choy,Broccoli,Brussel Sprouts,Bush beans (green beans),Cabbage,Carrots,...,Radishes,Rhubarb,Rutabaga,Shallots,Spinach,Summer Squash,Swiss Chard,Tomatoes,Turnips,Winter Squash
veggie_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Artichokes,0.0,0.106625,0.095648,0.165555,0.118225,0.097136,0.107318,0.121322,0.113323,0.113787,...,0.126521,0.169771,0.123633,0.096574,0.094731,0.113701,0.117724,0.107449,0.141182,0.117121
Arugula,0.106625,0.0,0.071842,0.147518,0.132488,0.083784,0.090161,0.095887,0.103993,0.092288,...,0.119242,0.130691,0.137964,0.08722,0.067948,0.111108,0.118808,0.087385,0.146702,0.123233
Asparagus,0.095648,0.071842,0.0,0.160414,0.107279,0.047656,0.074525,0.060137,0.079576,0.058942,...,0.11482,0.131626,0.11685,0.086356,0.040759,0.076392,0.128745,0.062308,0.14094,0.090546
Beets,0.165555,0.147518,0.160414,0.0,0.147149,0.177839,0.155912,0.159939,0.157683,0.174046,...,0.110128,0.178732,0.14254,0.150989,0.155327,0.185653,0.151678,0.161731,0.136879,0.181784
Bok Choy,0.118225,0.132488,0.107279,0.147149,0.0,0.101795,0.120795,0.122341,0.113047,0.119672,...,0.128547,0.140406,0.08995,0.10132,0.108681,0.105129,0.105006,0.130676,0.126066,0.096839


In [19]:
recommender_df.to_csv('./data/recommender_df.csv')

In [34]:
recommender_df['Broccoli'].sort_values()[1:11]

veggie_name
Bush beans (green beans)    0.034129
Potatoes                    0.036524
Garlic                      0.037901
Spinach                     0.038538
Lettuce                     0.042860
Pole beans(green beans)     0.044202
Carrots                     0.044551
Peppers                     0.045773
Onions                      0.046227
Asparagus                   0.047656
Name: Broccoli, dtype: float64

In [35]:
recommender_df['Potatoes'].sort_values()[1:11]

veggie_name
Garlic                      0.019856
Lettuce                     0.026143
Corn                        0.028650
Peppers                     0.030574
Carrots                     0.034130
Broccoli                    0.036524
Spinach                     0.038043
Onions                      0.038502
Bush beans (green beans)    0.040151
Pole beans(green beans)     0.043625
Name: Potatoes, dtype: float64

In [21]:
def recommend_veggie(search_term):
    titles = pivot.filter(like = search_term, axis='index').index
    
    for title in titles:
        print(title)
        print('Average rating:', pivot.loc[title].mean())
        print('Number of ratings:', pivot.loc[title].count())
        print()
        print('10 most similar veggies:')
        print(recommender_df[title].sort_values()[1:11])
        print('*' * 37)
        print()

In [22]:
recommend_veggie('Beets')

Beets
Average rating: 2.3582089552238807
Number of ratings: 134

10 most similar veggies:
veggie_name
Radishes       0.110128
Turnips        0.136879
Rutabaga       0.142540
Bok Choy       0.147149
Arugula        0.147518
Shallots       0.150989
Swiss Chard    0.151678
Leeks          0.152880
Peppers        0.152989
Potatoes       0.153980
Name: Beets, dtype: float64
*************************************



Read in our veg_info dataframe.

In [23]:
veg_info = pd.read_csv('./data/veg_info.csv')
veg_info.head()

Unnamed: 0,plants,sunlight,soil_condition,moisture_level,hardiness_zones,lifecycle,difficulty,height,spread,time_to_mature,flower_color,foliage_color,shape,frost_tolerance,special_att1,special_att2,special_use,link
0,Artichokes,full sunlight,well drained,high,"6, 7, 8, 9",perennial,3.0,3 to 6 feet,2 to 4 feet,85 - 100 days,violet,light green,spreading mass/upright,high,Not native to North America,none,edible landscaping,https://www.almanac.com/plant/artichokes
1,Arugula,full sunlight or partial shade,low fertility,medium,"3, 4, 5, 6, 7, 8, 9, 10, 11",annual,1.0,1 to 2 feet,0.5 to 1 foot,4 - 7 weeks,white,medium green,"cushion, mound, upright",medium,Not native to North America,none,edible flowers,https://www.almanac.com/plant/arugula
2,Asparagus,full sunlight or partial shade,well drained,low,"2, 3, 4, 5, 6, 7, 8",perennial,2.0,5 to 9 feet,2 to 2.5 feet,2 - 3 years,green,light green,upright,low,Not native to North America,"Bears ornamental fruit - small, bright red ber...",edible landscaping,https://www.almanac.com/plant/asparagus
3,Bush beans,full,well drained,medium,"3, 4, 5, 6, 7, 8, 9, 10",annual,1.0,1 to 3 feet,1 to 2 feet,50 - 60 days,white,medium green,spreading mass,low,Not native to North America but cultivated wor...,Bears ornamental fruit - some varieties have p...,edible landscaping,https://www.almanac.com/plant/beans
4,Beets,full sunlight or partial shade,low fertility,consistent,"2, 3, 4, 5, 6, 7, 8, 9, 10",annual,1.0,3 to 5 inches,0.5 to 1 foot,7 - 8 weeks,,medium green,"cushion, mound, or clump",medium,Not native to North America - native to Europe...,none,edible landscaping,https://www.almanac.com/plant/beets


Check dtypes:

1. hardiness_zones - need to be changed to a list of integers
2. flower_color - we'll have to be prepared to handle the NaNs

In [24]:
# def str_to_lst(x):
#     lst = x
#     lst = lst.split(",")
#     lst = [int(i) for i in lst]
#     return lst

In [25]:
# veg_info['hardiness_zones'] = veg_info['hardiness_zones'].apply(str_to_lst)

In [26]:
veg_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Data columns (total 18 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   plants           37 non-null     object 
 1   sunlight         37 non-null     object 
 2   soil_condition   37 non-null     object 
 3   moisture_level   37 non-null     object 
 4   hardiness_zones  37 non-null     object 
 5   lifecycle        37 non-null     object 
 6   difficulty       37 non-null     float64
 7   height           37 non-null     object 
 8   spread           37 non-null     object 
 9   time_to_mature   37 non-null     object 
 10  flower_color     17 non-null     object 
 11  foliage_color    37 non-null     object 
 12  shape            37 non-null     object 
 13  frost_tolerance  37 non-null     object 
 14  special_att1     37 non-null     object 
 15  special_att2     37 non-null     object 
 16  special_use      37 non-null     object 
 17  link             3

In [27]:
veg_lst = recommender_df[['Beets']].sort_values(by='Beets')[1:11]
veg_lst

veggie_name,Beets
veggie_name,Unnamed: 1_level_1
Radishes,0.110128
Turnips,0.136879
Rutabaga,0.14254
Bok Choy,0.147149
Arugula,0.147518
Shallots,0.150989
Swiss Chard,0.151678
Leeks,0.15288
Peppers,0.152989
Potatoes,0.15398


Create a list of recommended vegetables to parse our veg_info table for

In [28]:
veg_lst = veg_lst.index.tolist()
veg_lst

['Radishes',
 'Turnips',
 'Rutabaga',
 'Bok Choy',
 'Arugula',
 'Shallots',
 'Swiss Chard',
 'Leeks',
 'Peppers',
 'Potatoes']

Iterate through veg_info and print out the rows which match our veggies from the veg_list

In [29]:
for num in range(0, veg_info.shape[0]):
    for veg in veg_lst:
            if veg_info['plant'][num] == veg:
                print(veg_info.loc[[num]])

KeyError: 'plant'

Wrote a function to get top 10 recommended vegetables and then pull their information from our veg_info dataframe.

In [None]:
def pull_veggie(vegetable):
    '''
    This function serves to: 
    1) Pull the top 10 recommended vegetables from our recommender df
    2) Pull the data from our veg_info dataframe with specific information for those 10 recommended veggies
    Note: input needs to be a string from with a capitalized first letter
    '''
    veg_lst = recommender_df[[vegetable]].sort_values(by=vegetable)[1:11]
    veg_lst = veg_lst.index.tolist()
    
    created_df = pd.DataFrame(columns=veg_info.columns)
    
    for num in range(0, veg_info.shape[0]):
        for veg in veg_lst:
                if veg_info['plant'][num] == veg:
                    created_df = created_df.append(veg_info.loc[[num]])
                    
    return created_df

In [None]:
pull_veggie('Beets')

Additional input, skill_level = user's gardeneing skill level, 

In [None]:
def pull_veggie_v2(vegetable, skill_level):
    '''
    This function serves to: 
    1) Pull the top 10 recommended vegetables from our recommender df
    2) Pull the data from our veg_info dataframe with specific information for those 10 recommended veggies
    Note: input needs to be a string from with a capitalized first letter
    '''
    veg_lst = recommender_df[[vegetable]].sort_values(by=vegetable)[1:11]
    veg_lst = veg_lst.index.tolist()
    
    created_df = pd.DataFrame(columns=veg_info.columns)
    
    for num in range(0, veg_info.shape[0]):
        for veg in veg_lst:
                if veg_info['plant'][num] == veg and veg_info['difficulty'][num] <= skill_level:
                    created_df = created_df.append(veg_info.loc[[num]])
                    
    return created_df

In [None]:
pull_veggie_v2('Beets', 1)

Additional input, hardiness_zone = user's hardiness zone.

NOTE: I kept the hardiness_zone data as a string so I could use `in` to see if the user's specified zone was in the data

In [None]:
def pull_veggie_v3(vegetable, skill_level, hardiness_zone):
    '''
    This function serves to: 
    1) Pull the top 10 recommended vegetables from our recommender df
    2) Pull the data from our veg_info dataframe with specific information for those 10 recommended veggies
    3) Check if the difficuly level is <= the user's specificed level
    4) Check if the user's hardiness zone is in the top 10 veggie choices
    Note: input needs to be a string from with a capitalized first letter 
    '''
    veg_lst = recommender_df[[vegetable]].sort_values(by=vegetable)[1:11]
    veg_lst = veg_lst.index.tolist()
    
    created_df = pd.DataFrame(columns=veg_info.columns)
    
    for num in range(0, veg_info.shape[0]):
        for veg in veg_lst:
                if veg_info['plant'][num] == veg and veg_info['difficulty'][num] <= skill_level and (hardiness_zone in veg_info['hardiness_zones'][num]):
                    created_df = created_df.append(veg_info.loc[[num]])
                    
    return created_df

In [None]:
pull_veggie_v3('Beets', 1, '4')

In [None]:
veg_info.head(1)

In [None]:
#choose additional output data

def pull_veggie_v6(vegetable, skill_level, hardiness_zone):
    '''
    This function serves to: 
    1) Pull the top 10 recommended vegetables from our recommender df
    2) Pull the data from our veg_info dataframe with specific information for those 10 recommended veggies
    3) Check if the difficuly level is <= the user's specificed level
    4) Check if the user's hardiness zone is in the top 10 veggie choices
    5) Print some important facts about the plant
    Note: vegetable = string, skill_level = int, hardiness_zone = int as a string 
    '''
    veg_lst = recommender_df[[vegetable]].sort_values(by=vegetable)[1:11]
    veg_lst = veg_lst.index.tolist()
    
    created_df = pd.DataFrame(columns=veg_info.columns)
    
    for num in range(0, veg_info.shape[0]):
        for veg in veg_lst:
                if veg_info['plant'][num] == veg and veg_info['difficulty'][num] <= skill_level and (hardiness_zone in veg_info['hardiness_zones'][num]):
                    created_df = created_df.append(veg_info.loc[[num]])
    
    if created_df.shape[0] == 0:
        return "Unfortunately it doesn't look like we found any great matches for you, try adjusting your parameters"
    
    print("Our top picks for you are...")
    print()
    for num in range(0, created_df.shape[0]): 
        print(f"#{num+1} - {veg_lst[num]}!")
        print()
        print(f"A few important things to know before you grow {veg_lst[num]}:")
        print()
        print(f"1) Optimal sun exposure: {created_df.iloc[num][1]}")
        print()
        print(f"2) Ideal soil conditions: {created_df.iloc[num][2]}")
        print()
        print(f"3) Moisture level: {created_df.iloc[num][3]}")
        print()
        print(f"4) Expected spread: {created_df.iloc[num][7]}")
        print()
        print(f"5) Expected height: {created_df.iloc[num][8]}")
        print()
        print(f"6) Estimated time to harvest: {created_df.iloc[num][9]}")
        print()
        print("*" * 45)
        print()

In [None]:
pull_veggie_v6('Beets', 2, '4')

In [None]:
veg_info.iloc[1][4]

In [48]:
lst = ['hello', 'veggie', 'yum']
lst_2 = ['hello', 'veggie', 'yum', 'food']

In [50]:
for val in lst_2:
    if val not in lst:
        lst.append(val)
        
lst

['hello', 'veggie', 'yum', 'food']