In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors

In [2]:
df = pd.read_csv("Resources/Clean/beer_with_aggs.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,beer_id,brewery_id,beer_name,style,types,state,country,city,availability,abv,look,smell,taste,feel,overall
0,0,798,289,St. Ides High Gravity Malt Liquor,American Malt Liquor,Brewery,TX,US,San Antonio,Year-round,8.2,2.720721,2.29955,2.209459,2.52027,2.308559
1,1,1061,383,Pilz,Bohemian Pilsener,"Brewery, Bar",TX,US,Del Valle,Year-round,4.7,3.997207,3.895251,3.994413,4.03352,4.032123
2,2,1062,383,HefeWeizen,German Hefeweizen,"Brewery, Bar",TX,US,Del Valle,Year-round,5.2,4.242775,4.272158,4.343931,4.263487,4.369942
3,3,1063,383,Oaktoberfest,German Märzen / Oktoberfest,"Brewery, Bar",TX,US,Del Valle,Fall,5.8,3.915441,3.794118,3.886029,3.849265,3.952206
4,4,1064,383,Pale Ale,American Pale Ale (APA),"Brewery, Bar",TX,US,Del Valle,Rotating,5.0,3.803571,3.875,3.910714,3.821429,3.982143


In [4]:
df.columns

Index(['Unnamed: 0', 'beer_id', 'brewery_id', 'beer_name', 'style', 'types',
       'state', 'country', 'city', 'availability', 'abv', 'look', 'smell',
       'taste', 'feel', 'overall'],
      dtype='object')

In [5]:
df = df.drop(['Unnamed: 0', 'beer_id', 'brewery_id', 'beer_name', 'types',
       'state', 'country', 'city'], axis=1)

In [6]:
df["style"].value_counts()

American IPA               432
American Pale Ale (APA)    141
American Imperial IPA      140
Belgian Saison             140
American Imperial Stout     74
                          ... 
Belgian Gueuze               1
Finnish Sahti                1
Russian Kvass                1
Belgian Lambic               1
British Barleywine           1
Name: style, Length: 105, dtype: int64

In [None]:
#df = df.drop('beer_id', axis=1)

In [None]:
#df = df.drop('brewery_id', axis=1)

In [None]:
#df = df.drop('beer_name', axis=1)

In [None]:
#df = df.drop('types', axis=1)

In [None]:
df.columns


In [None]:
df2 = df.copy()

In [None]:
df2.info()

In [None]:
df2.availability.value_counts()

In [None]:
df2["style"].value_counts()

In [None]:
styles = sorted(df2["style"].unique())

for i in range(len(styles)):
    style = styles[i]
    df2.loc[df2["style"] == style, "style"] = i
styles

In [None]:
df2["style"] = pd.to_numeric(df2["style"])

In [None]:
df2.value_counts()

In [None]:
df2["style"].value_counts()

In [None]:
df2.info()

In [None]:
df2.info()

In [None]:
df2 = pd.get_dummies(df2)

In [None]:
df2.head()

In [None]:
df2.columns

In [None]:
len(df2.index)

In [None]:
from sklearn.preprocessing import StandardScaler
data = df2
scaler = StandardScaler()
print(scaler.fit(data))
StandardScaler()
print(scaler.mean_)

print(scaler.transform(data))

print(scaler.transform([[7, 7]]))


In [None]:
df_name = df2.beer_name
df_styles = df2["style"]
df2.drop(["beer_name", "style"], axis=1, inplace=True)
df2 = pd.get_dummies(df2)
df2

In [None]:
df3 = df2.copy()
df3["beer_name"] = df_name
df3["style"] = df_styles

In [None]:
first_column = df3.pop('beer_name')
df3.insert(0, 'beer_name', first_column)
second_column = df3.pop('style')
df3.insert(1, 'style', second_column)

In [None]:
df3.head()

In [None]:
output_data_file = "beer_rec.csv"

In [None]:
df3.to_csv(output_data_file,index=False)

In [None]:
# to find beer if user knows the beer name
def get_recommended(df, beer_name, n_neighbors=10):
    
    df_sub = df.drop(["beer_name", "style"], axis=1)
    model_knn = NearestNeighbors(metric='cosine', n_neighbors=n_neighbors)
    model_knn.fit(df_sub)
    
    beer = df.loc[df["beer_name"] == beer_name]
    beer = beer.drop(["beer_name", "style"], axis=1)
    beer = beer.to_numpy()
    
    distances, indices = model_knn.kneighbors(beer, n_neighbors = n_neighbors)
    
    result = df.iloc[indices.flatten()]
    result["Distance"] = distances.flatten()
    
    return result

In [None]:
df_result = get_recommended(df3, "Smash And Grab")

In [None]:
df_result.head(20)

In [None]:
# to find beer if user knows nothing about beer and wants to use genre and review score
def get_recommended_style(df, style, abv=0.0, n_neighbors=10):
    
    # explain metric hyper-parm in writeup
    df_sub = df.drop(["beer_name", "style"], axis=1)
    model_knn = NearestNeighbors(metric='cosine', n_neighbors=n_neighbors)
    model_knn.fit(df_sub)
    
    beer = df.loc[(df["style"] == style) & (df.abv > abv)].sample(1)
    beer = beer.drop(["beer_name", "style"], axis=1)
    beer = beer.to_numpy()
    
    distances, indices = model_knn.kneighbors(beer, n_neighbors = n_neighbors)
    
    result = df.iloc[indices.flatten()]
    result["Distance"] = distances.flatten()
    
    return result

In [None]:
df_result2 = get_recommended_style(df3, "New England IPA", 6.0)

In [None]:
df_result2.head(20)

In [None]:
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()
df_result3 = pd.DataFrame(scaler.fit_transform(df_result2),
                   columns=['beer_name', 'style', 'types', 'state', 'country', 'city',
       'availability', 'abv', 'look', 'smell', 'taste', 'feel', 'overall'],
                   index = [])
ax = df_result3.plot.scatter(x='overall', y='look',color=['red','green','blue','yellow'], 
                     marker = '*',s=80, label='BEFORE SCALING');
df_result3.plot.scatter(x='overall', y='look', color=['red','green','blue','yellow'],
                 marker = 'o',s=60,label='AFTER SCALING', ax = ax)
plt.axhline(0, color='red',alpha=0.2)
plt.axvline(0, color='red',alpha=0.2);

In [None]:
from sklearn.preprocessing import scale

sc = scale()
scaled_beer_features = sc.fit_transform(beer_features)
scaked

In [None]:
df.columns