<h4>Load Beer Data:</h4>

In [1]:
import pandas as pd
df = pd.read_csv('https://query.data.world/s/cqa9clje3ye4un611s1nw5fo3')

In [3]:
df = df[['beer_name','brewery_name','review_overall','review_aroma','review_appearance','beer_style','review_palate','review_taste']]
df.dropna(axis=0)

Unnamed: 0,beer_name,brewery_name,review_overall,review_aroma,review_appearance,beer_style,review_palate,review_taste
0,Sausa Weizen,Vecchio Birraio,1.5,2.0,2.5,Hefeweizen,1.5,1.5
1,Red Moon,Vecchio Birraio,3.0,2.5,3.0,English Strong Ale,3.0,3.0
2,Black Horse Black Beer,Vecchio Birraio,3.0,2.5,3.0,Foreign / Export Stout,3.0,3.0
3,Sausa Pils,Vecchio Birraio,3.0,3.0,3.5,German Pilsener,2.5,3.0
4,Cauldron DIPA,Caldera Brewing Company,4.0,4.5,4.0,American Double / Imperial IPA,4.0,4.5
5,Caldera Ginger Beer,Caldera Brewing Company,3.0,3.5,3.5,Herbed / Spiced Beer,3.0,3.5
6,Caldera Ginger Beer,Caldera Brewing Company,3.5,3.5,3.5,Herbed / Spiced Beer,4.0,4.0
7,Caldera Ginger Beer,Caldera Brewing Company,3.0,2.5,3.5,Herbed / Spiced Beer,2.0,3.5
8,Caldera Ginger Beer,Caldera Brewing Company,4.0,3.0,3.5,Herbed / Spiced Beer,3.5,4.0
9,Caldera Ginger Beer,Caldera Brewing Company,4.5,3.5,5.0,Herbed / Spiced Beer,4.0,4.0


<h4>Remap Beer Style and Beer Name to integers:</h4>

In [4]:
brewery_name = df['brewery_name'].values
dict_brewery = dict(zip(brewery_name,range(len(brewery_name))))
beer_style = df['beer_style'].values 
dict_style = dict(zip(beer_style,range(len(beer_style))))
beer_name = df['beer_name'].values
dict_name = dict(zip(beer_name,range(len(beer_name))))
reverse_dict_name = {v: k for k, v in dict_name.items()}

In [5]:
df2 = df.copy()
df2['brewery_name'] = df2['brewery_name'].apply(lambda i: dict_brewery[i])
df2['beer_style'] = df2['beer_style'].apply(lambda i: dict_style[i])
df2['beer_name'] = df2['beer_name'].apply(lambda i: dict_name[i])

<h4>Extract Features We Want:</h4>

In [6]:
features = df2[['beer_name','brewery_name','review_overall','review_aroma','review_appearance','beer_style','review_palate','review_taste']]
#features = features.dropna(axis=0)
features = features.groupby(['beer_name']).mean()

namez = features.copy()
namez.reset_index(level=0, inplace=True)
print(features)

           brewery_name  review_overall  review_aroma  review_appearance  \
beer_name                                                                  
0          3.000000e+00        1.500000      2.000000           2.500000   
1          3.000000e+00        3.000000      2.500000           3.000000   
2          3.000000e+00        3.000000      2.500000           3.000000   
3          3.000000e+00        3.000000      3.000000           3.500000   
4          9.090000e+02        4.000000      4.500000           4.000000   
14         9.090000e+02        3.833333      3.722222           3.666667   
16         9.090000e+02        2.500000      2.250000           2.500000   
17         9.090000e+02        4.000000      3.000000           4.000000   
62         9.090000e+02        4.166667      4.309524           3.821429   
227        9.090000e+02        3.981595      3.705521           3.849693   
235        9.090000e+02        4.055556      3.666667           3.888889   
272        9

In [8]:
#Lookup the beer we want then get the row for the features and pass this into the KD Tree
available_beers = namez['beer_name'].apply(lambda i: reverse_dict_name[i]).values
feat_vec = features[['brewery_name', 'review_overall', 'review_aroma',
       'review_appearance', 'beer_style', 'review_palate', 'review_taste']].values

<h4>Create a KDTree for fast nearest neighbors lookup:</h4>

In [175]:
from scipy.spatial import KDTree
tree = KDTree(feat_vec)

<h4>User Selects Beer from List then get the feature corresponding to this beer:</h4>

In [176]:
user_beer = 'Harboe Bear Beer Premium Strong Beer'
#user_beer = 'Drunkel Weissen'
query_beer = dict_name[user_beer]
print(query_beer) #this is the beer id
query = namez[namez['beer_name'] == query_beer].values.flatten()[1:]
bn = (int(namez[namez['beer_name'] == query_beer].values[0][0]))
print(reverse_dict_name[bn])
#query = features.iloc[query_beer].values
#print(features.iloc[query_beer])

1611
Harboe Bear Beer Premium Strong Beer


<h4>Query the KDTree with the user selected beer's feature vector:</h4>

In [184]:
distance,index = tree.query(query,k=4)
print(index)
for i in index: 
    print(i)
    print(features.iloc[i])
    print(reverse_dict_name[features.iloc[i].name])
    print()

[93 78 80 90]
93
brewery_name            1626.0
review_overall             3.0
review_aroma               3.0
review_appearance          3.5
beer_style           1577975.0
review_palate              2.5
review_taste               2.5
Name: 1611, dtype: float64
Harboe Bear Beer Premium Strong Beer

78
brewery_name            1626.0
review_overall             3.3
review_aroma               2.9
review_appearance          2.6
beer_style           1577975.0
review_palate              2.6
review_taste               2.4
Name: 1593, dtype: float64
Harboe Bjørnebryg Extra Stark (Bear Beer Extra Strong)

80
brewery_name            1626.0
review_overall             2.0
review_aroma               2.5
review_appearance          3.5
beer_style           1577975.0
review_palate              3.0
review_taste               2.5
Name: 1595, dtype: float64
Netto "24" Fødselsdags-Bryg

90
brewery_name            1626.0
review_overall             1.5
review_aroma               2.0
review_appearance         

<h4>Get User Recommended Beer Name:</h4>

In [185]:
recommended_beers = []
for i in index: 
    #beer_id = int(namez.iloc[i].beer_name)
    #recommended_beers.append(df['beer_name'].iloc[i])
    recommended_beers.append(reverse_dict_name[features.iloc[i].name])

print(recommended_beers)
#Remove user beer since it has distance 0 in the lookup    
if user_beer in recommended_beers: 
    recommended_beers.remove(user_beer)
print(recommended_beers)

['Harboe Bjørnebryg Extra Stark (Bear Beer Extra Strong)', 'Netto "24" Fødselsdags-Bryg', 'Harboe Årgangssbryg']


<h4>Beer Id, Name to csv for flask app</h4>

In [110]:
df_csv = pd.DataFrame()
df_csv['beer_name'] = namez['beer_name'].apply(lambda i: reverse_dict_name[i])
df_csv['beer_id'] = namez['beer_name']

In [112]:
df_csv.to_csv('data.csv',index=None)