In [286]:
# Import Dependencies
import pandas as pd
import plotly.express as px
import hvplot.pandas
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


In [3]:
# Display all columns
pd.set_option('display.max_columns', None)

### Preprocessing data

In [4]:
# Load data into dataframe
df = pd.read_csv("csv_files/beer_info_from_db.csv")

In [323]:
df.sort_values("beer_id")

Unnamed: 0,beer_id,beer_name,beer_style,style_key,brewery,description,abv,ave_rating,min_ibu,max_ibu,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,avg_ibu,condensed_style
0,1,Amber,Altbier,8,Alaskan Brewing Co.,"Notes:Richly malty and long on the palate, wit...",5.3,3.65,25,50,13,32,9,47,74,33,0,33,57,8,111,37.5,Altbier
1,2,Double Bag,Altbier,8,Long Trail Brewing Co.,"Notes:This malty, full-bodied double alt is al...",7.2,3.90,25,50,12,57,18,33,55,16,0,24,35,12,84,37.5,Altbier
2,3,Long Trail Ale,Altbier,8,Long Trail Brewing Co.,Notes:Long Trail Ale is a full-bodied amber al...,5.0,3.58,25,50,14,37,6,42,43,11,0,10,54,4,62,37.5,Altbier
3,4,Doppelsticke,Altbier,8,Uerige Obergärige Hausbrauerei,Notes:,8.5,4.15,25,50,13,55,31,47,101,18,1,49,40,16,119,37.5,Altbier
4,5,Scurry,Altbier,8,Off Color Brewing,Notes:Just cause it's dark and German doesn't ...,5.3,3.67,25,50,21,69,10,63,120,14,0,19,36,15,218,37.5,Altbier
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5423,5552,Red Nose Winter Ale,Winter Warmer,17,Natty Greene's Pub & Brewing Co.,Notes:,6.8,3.59,35,50,8,44,24,19,52,21,0,26,21,96,77,42.5,Winter Warmer
5552,5553,Our Special Ale 2019 (Anchor Christmas Ale),Winter Warmer,17,Anchor Brewing Company,Notes:This is the forty-fifth annual Our Speci...,6.9,3.79,35,50,22,73,9,58,87,23,0,48,91,44,104,42.5,Winter Warmer
5553,5554,Fish Tale Winterfish,Winter Warmer,17,Fish Brewing Company / Fishbowl Brewpub,Notes:,7.5,3.76,35,50,11,36,50,70,72,59,0,81,110,18,73,42.5,Winter Warmer
5554,5555,"He'Brew Hanukkah, Chanukah: Pass The Beer",Winter Warmer,17,Shmaltz Brewing Company,Notes:Chanukah Beer pours a rich crystal clear...,8.0,3.61,35,50,6,64,30,57,78,15,1,28,57,23,129,42.5,Winter Warmer


In [326]:
df[df.beer_style == "Fruit and Field Beer"]

Unnamed: 0,beer_id,beer_name,beer_style,style_key,brewery,description,abv,ave_rating,min_ibu,max_ibu,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,avg_ibu,condensed_style
1261,1251,#9,Fruit and Field Beer,77,Magic Hat Brewing Company,Notes:A beer cloaked in secrecy. An ale whose ...,5.1,3.42,5,45,22,23,1,31,73,38,0,74,50,2,28,25.0,Fruit and Field Beer
1262,1252,Samuel Adams Cherry WheatBoston Beer Company (...,Fruit and Field Beer,77,Boston Beer Company (Samuel Adams),Notes:,5.3,3.19,5,45,4,9,8,7,40,16,0,69,12,0,25,25.0,Fruit and Field Beer
1263,1253,Raspberry Tart,Fruit and Field Beer,77,New Glarus Brewing Company,Notes:Treat yourself to a rare delight. The vo...,4.0,4.41,5,45,25,32,14,7,193,90,0,137,11,4,16,25.0,Fruit and Field Beer
1264,1254,Summer Shandy,Fruit and Field Beer,77,Jacob Leinenkugel Brewing Company,Notes:,4.2,3.2,5,45,8,23,9,6,22,55,0,51,5,1,28,25.0,Fruit and Field Beer
1265,1255,Rübæus,Fruit and Field Beer,77,Founders Brewing Company,Notes:This brew achieves its unique raspberry ...,5.7,3.91,5,45,14,37,7,12,138,50,0,88,16,1,20,25.0,Fruit and Field Beer
1266,1256,Wisconsin Belgian Red,Fruit and Field Beer,77,New Glarus Brewing Company,Notes:You hold the marriage of wine and beer. ...,4.0,4.42,5,45,22,34,8,9,81,67,0,137,16,6,32,25.0,Fruit and Field Beer
1267,1257,Purple Haze,Fruit and Field Beer,77,Abita Brewing Co.,Notes:Experience the magic of Purple Haze.® Cl...,4.2,3.32,5,45,11,30,3,11,90,49,1,77,19,1,59,25.0,Fruit and Field Beer
1268,1258,Banana Bread Beer,Fruit and Field Beer,77,Eagle Brewery,"Notes:Long ago, ale was known as 'liquid bread...",5.2,3.56,5,45,14,34,7,26,44,21,2,92,27,14,73,25.0,Fruit and Field Beer
1269,1259,Serendipity,Fruit and Field Beer,77,New Glarus Brewing Company,"Notes:Severe Drought, we shared the farmer’s h...",4.0,4.38,5,45,13,34,12,3,204,137,2,202,9,4,15,25.0,Fruit and Field Beer
1270,1260,Hell Or High Watermelon Wheat Beer,Fruit and Field Beer,77,21st Amendment Brewery,"Notes:Like Lady Liberty, we stand for independ...",4.9,3.4,5,45,31,27,9,17,66,49,5,100,37,17,63,25.0,Fruit and Field Beer


In [351]:
styles = [
    {"style": "Bock", "substyles":["Bock - Doppelbock", "Bock - Eisbock", "Bock - Maibock", "Bock - Traditional", "Bock - Weizenbock"]},
    {"style": "Brown Ales", "substyles": ["Altbier", "Brown Ale - American", "Brown Ale - Belgian Dark", "Brown Ale - English", "Mild Ale - English Dark"]},
    {"style": "Dark Ales", "substyles": ["Dubbel", "Rye Beer - Roggenbier", "Scottish Ale", "Winter Warmer"]},
    {"style": "Dark Lagers", "substyles": ["Lager - American Amber / Red", "Lager - European Dark", "Lager - Märzen", "Lager - Munich Dunkel", "Lager - Rauchbier", "Lager - Schwarzbier", "Lager - Vienna"]},
    {"style": "Hybrid Beers", "substyles": ["Bière de Champagne / Bière Brut", "Braggot", "California Common / Steam Beer", "Cream Ale"]},
    {"style": "India Pale Ales", "substyles": ["IPA - American", "IPA - Belgian", "IPA - Black / Cascadian Dark Ale", "IPA - Brut", "IPA - English", "IPA - Imperial", "IPA - New England"]},
    {"style": "Pale Ales", "substyles": ["Bitter - English", "Bitter - English Extra Special / Strong Bitter (ESB)", "Blonde Ale - Belgian", "Blonde Ale - American", "Farmhouse Ale - Bière de Garde", "Farmhouse Ale - Saison", "Kölsch", "Mild Ale - English Pale", "Pale Ale - American", "Pale Ale - Belgian", "Pale Ale - English", "Red Ale - American Amber / Red", "Red Ale - Irish"]},
    {"style": "Porters", "substyles": ["Porter - American", "Porter - Baltic", "Porter - English", "Porter - Imperial", "Porter - Robust", "Porter - Smoked"]},
    {"style": "Specialty Beers", "substyles": ["Chile Beer", "Farmhouse Ale - Sahti", "Fruit and Field Beer", "Gruit / Ancient Herbed Ale", "Happoshu", "Herb and Spice Beer", "Kvass", "Lager - Japanese Rice", "Low Alcohol Beer", "Pumpkin Beer", "Rye Beer", "Smoked Beer"]},
    {"style": "Stouts", "substyles":  ["Stout - Sweet / Milk", "Stout - Russian Imperial", "Stout - Oatmeal", "Stout - Irish Dry", "Stout - Foreign / Export", "Stout - English", "Stout - American Imperial", "Stout - American" ]},
    {"style": "Strong Ales", "substyles": ["Wheat Beer - Wheatwine", "Tripel", "Strong Ale - English", "Strong Ale - Belgian Pale", "Strong Ale - Belgian Dark" , "Strong Ale - American", "Scotch Ale / Wee Heavy", "Red Ale - Imperial", "Quadrupel (Quad)", "Old Ale", "Barleywine - English", "Barleywine - American"]},
    {"style": "Wheat Beers", "substyles": ["Wheat Beer - Witbier", "Wheat Beer - Kristallweizen", "Wheat Beer - Hefeweizen", "Wheat Beer - Dunkelweizen", "Wheat Beer - American Pale", "Wheat Beer - American Dark"]},
    {"style": "Wild/Sour ", "substyles": [ "Brett Beer", "Lambic - Faro", "Lambic - Fruit", "Lambic - Gueuze", "Lambic - Traditional", "Sour - Berliner Weisse", "Sour - Flanders Oud Bruin", "Sour - Flanders Red Ale", "Sour - Fruited Kettle Sour", "Sour - Gose", "Wild Ale"]},
    {"style": "Pale Lagers", "substyles": ["Lager - Adjunct", "Lager - American", "Lager - European / Dortmunder Export", "Lager - European Pale", "Lager - European Strong", "Lager - Festbier / Wiesnbier", "Lager - Helles", "Lager - India Pale Lager (IPL)", "Lager - India Pale Lager", "Lager - Kellerbier / Zwickelbier", "Lager - Light", "Lager - Malt Liquor", "Pilsner - Bohemian / Czech", "Pilsner - German", "Pilsner - Imperial", "Lager - Märzen / Oktoberfest"]}
]

In [352]:
def get_big_style(x):
    for item in styles:
        if x in item["substyles"]:
            return item["style"]

In [353]:
df["BA_Big_styles"] = df.beer_style.apply(get_big_style)

In [354]:
set(df.BA_Big_styles.to_list())

{'Bock',
 'Brown Ales',
 'Dark Ales',
 'Dark Lagers',
 'Hybrid Beers',
 'India Pale Ales',
 'Pale Ales',
 'Pale Lagers',
 'Porters',
 'Specialty Beers',
 'Stouts',
 'Strong Ales',
 'Wheat Beers',
 'Wild/Sour '}

In [355]:
# Calculate average ibu and add to column
df["avg_ibu"] = (df.min_ibu + df.max_ibu) /2

In [356]:
# Filter out beers with abv > 13
df = df[df.abv <= 13]
df = df[df.abv >= 3]

In [357]:
# Get list of unique styles
beer_styles = df.beer_style.to_list()
beer_styles_unique = set(beer_styles)
print(len(beer_styles))
print(len(beer_styles_unique))

5348
111


In [358]:
# Calculate number of items for each style
for item in beer_styles_unique:
    counter = 0
    for x in beer_styles:
        if item == x:
            counter += 1
    print(item, counter)

Porter - English 50
Scottish Ale 50
Lambic - Faro 16
Lager - Malt Liquor 50
Brown Ale - English 50
Stout - English 50
Wheat Beer - Hefeweizen 49
Strong Ale - American 41
Brown Ale - Belgian Dark 48
Fruit and Field Beer 48
Lambic - Gueuze 49
Porter - Imperial 49
Barleywine - American 44
Sour - Flanders Red Ale 49
Wheat Beer - Wheatwine 42
Lager - European / Dortmunder Export 50
Pale Ale - Belgian 50
Stout - American Imperial 42
Lager - Light 47
Blonde Ale - Belgian 50
Pale Ale - English 50
Strong Ale - Belgian Pale 50
Lager - European Pale 49
Cream Ale 50
Lager - European Strong 48
Sour - Flanders Oud Bruin 49
Kölsch 50
Stout - Irish Dry 50
Sour - Berliner Weisse 48
Porter - Smoked 48
Lager - Märzen / Oktoberfest 50
Wheat Beer - Dunkelweizen 49
Bitter - English 50
Stout - Russian Imperial 43
Old Ale 46
Lager - European Dark 49
Sour - Gose 49
Wheat Beer - American Pale 50
Red Ale - Irish 50
Bock - Weizenbock 50
Brett Beer 50
Herb and Spice Beer 50
Red Ale - American Amber / Red 50
Lager 

In [359]:
beer_styles_condensed = ["IPA", "Porter", "Lager", "Pale Ale", "Pilsner", "Stout", "Wheat Beer", "Bock",  
                         "Blonde Ale", "Sour", "Lambic", "Brown Ale", "Barleywine", "Strong Ale", "Farmhouse Ale",
                         "Bitter", "Red Ale"]

In [360]:
# Function to reduce styles

def reduce_styles(style):
    for count, item in enumerate (beer_styles_condensed, start=1):
#         print(item, style)
        if item in style:
            return(item)
        elif count == len(beer_styles_condensed):
            return(style)
        else:
            continue

In [361]:
df["condensed_style"] = df.beer_style.apply(reduce_styles)

In [362]:
df

Unnamed: 0,beer_id,beer_name,beer_style,style_key,brewery,description,abv,ave_rating,min_ibu,max_ibu,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,avg_ibu,condensed_style,BA_Big_styles
0,1,Amber,Altbier,8,Alaskan Brewing Co.,"Notes:Richly malty and long on the palate, wit...",5.3,3.65,25,50,13,32,9,47,74,33,0,33,57,8,111,37.5,Altbier,Brown Ales
1,2,Double Bag,Altbier,8,Long Trail Brewing Co.,"Notes:This malty, full-bodied double alt is al...",7.2,3.90,25,50,12,57,18,33,55,16,0,24,35,12,84,37.5,Altbier,Brown Ales
2,3,Long Trail Ale,Altbier,8,Long Trail Brewing Co.,Notes:Long Trail Ale is a full-bodied amber al...,5.0,3.58,25,50,14,37,6,42,43,11,0,10,54,4,62,37.5,Altbier,Brown Ales
3,4,Doppelsticke,Altbier,8,Uerige Obergärige Hausbrauerei,Notes:,8.5,4.15,25,50,13,55,31,47,101,18,1,49,40,16,119,37.5,Altbier,Brown Ales
4,5,Scurry,Altbier,8,Off Color Brewing,Notes:Just cause it's dark and German doesn't ...,5.3,3.67,25,50,21,69,10,63,120,14,0,19,36,15,218,37.5,Altbier,Brown Ales
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5551,5551,The First Snow Ale,Winter Warmer,17,RJ Rockers Brewing Company,Notes:This hearty American pale ale contains a...,6.0,3.63,35,50,15,31,23,16,54,43,0,54,14,140,58,42.5,Winter Warmer,Dark Ales
5552,5553,Our Special Ale 2019 (Anchor Christmas Ale),Winter Warmer,17,Anchor Brewing Company,Notes:This is the forty-fifth annual Our Speci...,6.9,3.79,35,50,22,73,9,58,87,23,0,48,91,44,104,42.5,Winter Warmer,Dark Ales
5553,5554,Fish Tale Winterfish,Winter Warmer,17,Fish Brewing Company / Fishbowl Brewpub,Notes:,7.5,3.76,35,50,11,36,50,70,72,59,0,81,110,18,73,42.5,Winter Warmer,Dark Ales
5554,5555,"He'Brew Hanukkah, Chanukah: Pass The Beer",Winter Warmer,17,Shmaltz Brewing Company,Notes:Chanukah Beer pours a rich crystal clear...,8.0,3.61,35,50,6,64,30,57,78,15,1,28,57,23,129,42.5,Winter Warmer,Dark Ales


In [363]:
# Count of Unique styles
# passing "set()" to a list adds only unique values from the list to the set
condensed_styles = set(df.condensed_style.to_list())
len(condensed_styles)

44

In [364]:
condensed_styles

{'Altbier',
 'Barleywine',
 'Bitter',
 'Bière de Champagne / Bière Brut',
 'Blonde Ale',
 'Bock',
 'Braggot',
 'Brett Beer',
 'Brown Ale',
 'California Common / Steam Beer',
 'Chile Beer',
 'Cream Ale',
 'Dubbel',
 'Farmhouse Ale',
 'Fruit and Field Beer',
 'Gruit / Ancient Herbed Ale',
 'Happoshu',
 'Herb and Spice Beer',
 'IPA',
 'Kvass',
 'Kölsch',
 'Lager',
 'Lambic',
 'Mild Ale - English Dark',
 'Mild Ale - English Pale',
 'Old Ale',
 'Pale Ale',
 'Pilsner',
 'Porter',
 'Pumpkin Beer',
 'Quadrupel (Quad)',
 'Red Ale',
 'Rye Beer',
 'Rye Beer - Roggenbier',
 'Scotch Ale / Wee Heavy',
 'Scottish Ale',
 'Smoked Beer',
 'Sour',
 'Stout',
 'Strong Ale',
 'Tripel',
 'Wheat Beer',
 'Wild Ale',
 'Winter Warmer'}

### K-Means

In [365]:
df.columns

Index(['beer_id', 'beer_name', 'beer_style', 'style_key', 'brewery',
       'description', 'abv', 'ave_rating', 'min_ibu', 'max_ibu', 'astringency',
       'body', 'alcohol', 'bitter', 'sweet', 'sour', 'salty', 'fruits',
       'hoppy', 'spices', 'malty', 'avg_ibu', 'condensed_style',
       'BA_Big_styles'],
      dtype='object')

In [386]:
# Drop columns unnecessary for analysis
new_df = df.drop(['beer_id', 'beer_name', 'beer_style', 'style_key', 'brewery',
       'description', 'ave_rating', "min_ibu", "max_ibu", "salty", "alcohol",
        "condensed_style", "avg_ibu", "BA_Big_styles"], axis=1)

In [387]:
new_df.head()

Unnamed: 0,abv,astringency,body,bitter,sweet,sour,fruits,hoppy,spices,malty
0,5.3,13,32,47,74,33,33,57,8,111
1,7.2,12,57,33,55,16,24,35,12,84
2,5.0,14,37,42,43,11,10,54,4,62
3,8.5,13,55,47,101,18,49,40,16,119
4,5.3,21,69,63,120,14,19,36,15,218


In [388]:
# define standard scaler
scaler = StandardScaler()
# transform data
scaled_df = scaler.fit_transform(new_df)

In [389]:
# Looking for the best K - unscaled
# inertia = []
# k = list(range(1, 15))

# for i in k:
#     km = KMeans(n_clusters=i, random_state=0)
#     km.fit(new_df)
#     inertia.append(km.inertia_)
    
# Looking for the best K - scaled
inertia = []
k = list(range(1, 15))

for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(scaled_df)
    inertia.append(km.inertia_)


In [390]:
# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)

In [391]:
# Initializing model with K = 3 
model = KMeans(n_clusters=7, random_state=5)
model

KMeans(n_clusters=7, random_state=5)

In [392]:
# Fitting model
model.fit(new_df)

KMeans(n_clusters=7, random_state=5)

In [393]:
# Get the predictions
predictions = model.predict(new_df)
print(predictions)

[5 5 5 ... 2 4 6]


In [394]:
# Add a new class column to the df
new_df["class"] = model.labels_
new_df.head()

Unnamed: 0,abv,astringency,body,bitter,sweet,sour,fruits,hoppy,spices,malty,class
0,5.3,13,32,47,74,33,33,57,8,111,5
1,7.2,12,57,33,55,16,24,35,12,84,5
2,5.0,14,37,42,43,11,10,54,4,62,5
3,8.5,13,55,47,101,18,49,40,16,119,0
4,5.3,21,69,63,120,14,19,36,15,218,4


In [396]:
style_df = df[["beer_style", "style_key", "condensed_style", "ave_rating", "BA_Big_styles"]]

In [397]:
combined_df = new_df.join(style_df)

In [398]:
combined_df.sort_values("abv", ascending=False)

Unnamed: 0,abv,astringency,body,bitter,sweet,sour,fruits,hoppy,spices,malty,class,beer_style,style_key,condensed_style,ave_rating,BA_Big_styles
4507,13.0,11,112,80,61,9,17,14,88,137,4,Stout - American Imperial,89,Stout,4.34,Stouts
4991,13.0,6,20,10,70,28,32,14,7,50,6,Strong Ale - Belgian Pale,105,Strong Ale,3.47,Strong Ales
801,13.0,8,73,69,79,3,9,7,23,174,4,Brown Ale - American,9,Brown Ale,4.29,Brown Ales
4894,13.0,3,75,31,73,8,87,4,35,105,0,Strong Ale - American,103,Strong Ale,4.53,Strong Ales
4875,13.0,4,105,77,69,8,31,10,91,176,4,Strong Ale - American,103,Strong Ale,4.57,Strong Ales
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4297,3.0,39,23,7,40,127,66,5,1,34,3,Sour - Berliner Weisse,122,Sour,3.55,Wild/Sour
4267,3.0,58,27,5,83,181,124,16,10,53,3,Sour - Berliner Weisse,122,Sour,4.18,Wild/Sour
2479,3.0,0,4,2,1,0,1,2,0,7,1,Lager - Light,62,Lager,1.50,Pale Lagers
4280,3.0,37,27,6,44,130,55,7,0,33,3,Sour - Berliner Weisse,122,Sour,3.76,Wild/Sour


In [399]:
combined_df.head(50)

Unnamed: 0,abv,astringency,body,bitter,sweet,sour,fruits,hoppy,spices,malty,class,beer_style,style_key,condensed_style,ave_rating,BA_Big_styles
0,5.3,13,32,47,74,33,33,57,8,111,5,Altbier,8,Altbier,3.65,Brown Ales
1,7.2,12,57,33,55,16,24,35,12,84,5,Altbier,8,Altbier,3.9,Brown Ales
2,5.0,14,37,42,43,11,10,54,4,62,5,Altbier,8,Altbier,3.58,Brown Ales
3,8.5,13,55,47,101,18,49,40,16,119,0,Altbier,8,Altbier,4.15,Brown Ales
4,5.3,21,69,63,120,14,19,36,15,218,4,Altbier,8,Altbier,3.67,Brown Ales
5,7.2,25,51,44,45,9,11,51,20,95,5,Altbier,8,Altbier,3.78,Brown Ales
6,6.0,22,45,46,62,25,34,60,4,103,5,Altbier,8,Altbier,4.1,Brown Ales
7,5.3,28,40,40,58,29,36,54,8,97,5,Altbier,8,Altbier,3.46,Brown Ales
8,5.0,18,49,37,73,22,21,37,4,98,5,Altbier,8,Altbier,3.6,Brown Ales
9,4.8,25,35,38,39,13,8,60,16,97,5,Altbier,8,Altbier,4.1,Brown Ales


In [400]:
combined_df.groupby(["BA_Big_styles"]).median().sort_values(["class"])

Unnamed: 0_level_0,abv,astringency,body,bitter,sweet,sour,fruits,hoppy,spices,malty,class,style_key,ave_rating
BA_Big_styles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Bock,7.5,11.0,47.0,25.0,71.0,20.0,32.0,25.0,9.0,89.0,1,4.0,3.83
Dark Ales,6.5,9.0,42.0,20.5,66.5,20.0,27.0,22.0,22.0,78.0,1,15.5,3.775
Hybrid Beers,5.8,9.0,20.0,12.0,27.0,14.0,11.0,19.0,4.0,34.0,1,29.0,3.73
India Pale Ales,7.1,18.0,38.0,65.0,46.0,51.0,67.0,86.5,6.0,40.0,2,35.0,4.01
Specialty Beers,5.7,8.0,24.0,10.0,26.0,11.0,15.0,14.0,18.0,33.0,2,80.0,3.71
Strong Ales,9.45,11.0,48.0,27.0,88.0,34.0,57.0,30.0,20.0,84.0,2,103.0,3.98
Pale Ales,5.4,19.0,36.0,37.0,48.0,38.0,41.0,58.0,9.0,62.0,3,46.0,3.74
Wild/Sour,6.0,30.0,29.0,6.0,52.0,129.0,83.0,11.0,6.0,22.0,3,122.0,4.06
Brown Ales,5.5,12.0,51.0,36.0,62.0,13.0,15.0,33.0,8.0,103.0,4,10.0,3.73
Porters,6.5,10.0,87.0,52.0,65.0,9.0,11.0,24.0,17.0,119.0,4,70.0,3.96


In [401]:
bar_df = combined_df.groupby(["class"]).mean().sort_values(["class"])
bar_df.reset_index(inplace=True)
bar_df

Unnamed: 0,class,abv,astringency,body,bitter,sweet,sour,fruits,hoppy,spices,malty,style_key,ave_rating
0,0,8.594195,12.703436,54.719711,27.920434,111.857143,37.151899,64.481013,30.598553,22.538879,97.202532,64.146474,3.882803
1,1,6.139205,6.097007,10.494324,8.433437,12.008256,8.431373,8.344685,11.384933,5.493292,17.442724,62.835913,3.535439
2,2,6.671366,22.672131,42.009563,69.64071,49.165301,49.90847,61.554645,97.392077,12.561475,67.068306,52.286885,3.901721
3,3,6.438941,33.95723,35.203666,18.014257,73.069246,137.338086,107.843177,27.91446,12.810591,32.747454,100.346232,4.085906
4,4,7.177507,13.317694,92.002681,63.907507,74.226542,13.4437,17.894102,35.178284,24.636729,137.205094,66.315013,3.961542
5,5,5.724996,17.339847,41.572642,36.325404,46.107052,16.429057,18.121495,45.433305,9.781648,81.481733,47.856415,3.579108
6,6,6.648221,15.157353,35.719118,18.794118,46.882353,39.408824,53.852941,25.814706,46.597059,50.457353,76.491176,3.739515


In [402]:
fig = px.bar(bar_df, x="class", y="ave_rating")
fig.show()

In [403]:
bar_df.columns

Index(['class', 'abv', 'astringency', 'body', 'bitter', 'sweet', 'sour',
       'fruits', 'hoppy', 'spices', 'malty', 'style_key', 'ave_rating'],
      dtype='object')

In [404]:
fig = px.bar(bar_df, x="class", y=['abv', 'astringency', 'body', 'bitter', 'sweet', 'sour',
       'fruits', 'hoppy', 'spices'])
fig.show()

In [292]:

# fig = px.scatter_3d(combined_df, x='abv', y='style_key', z='malty',
#               color='class')
# fig.show()

### KNN

In [373]:
# Drop columns unnecessary for analysis
knn_df = df.drop(['beer_id', 'beer_name', 'beer_style', 'style_key', 'brewery',
       'description', 'ave_rating', "min_ibu", "max_ibu", "salty", "alcohol",
        "avg_ibu"], axis=1)

In [374]:
knn_df

Unnamed: 0,abv,astringency,body,bitter,sweet,sour,fruits,hoppy,spices,malty,condensed_style,BA_Big_styles
0,5.3,13,32,47,74,33,33,57,8,111,Altbier,Brown Ales
1,7.2,12,57,33,55,16,24,35,12,84,Altbier,Brown Ales
2,5.0,14,37,42,43,11,10,54,4,62,Altbier,Brown Ales
3,8.5,13,55,47,101,18,49,40,16,119,Altbier,Brown Ales
4,5.3,21,69,63,120,14,19,36,15,218,Altbier,Brown Ales
...,...,...,...,...,...,...,...,...,...,...,...,...
5551,6.0,15,31,16,54,43,54,14,140,58,Winter Warmer,Dark Ales
5552,6.9,22,73,58,87,23,48,91,44,104,Winter Warmer,Dark Ales
5553,7.5,11,36,70,72,59,81,110,18,73,Winter Warmer,Dark Ales
5554,8.0,6,64,57,78,15,28,57,23,129,Winter Warmer,Dark Ales


In [375]:
knn_df.columns

Index(['abv', 'astringency', 'body', 'bitter', 'sweet', 'sour', 'fruits',
       'hoppy', 'spices', 'malty', 'condensed_style', 'BA_Big_styles'],
      dtype='object')

In [376]:
# Dependent value/value we are trying to predict = style
y = df["BA_Big_styles"]
X = df[['abv', 'astringency', 'body', 'bitter', 'sweet', 'sour', 'fruits',
       'hoppy', 'spices', 'malty']]

In [377]:
# Split into testing and training data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [378]:
# Scale data
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [384]:
# Instatiate KNN class and train model
classifier = KNeighborsClassifier(n_neighbors=7)
classifier.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=7)

In [385]:
# View results
y_pred = classifier.predict(X_test)
# print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
# output_dict = classification_report(y_test, y_pred, output_dict=True)

                 precision    recall  f1-score   support

           Bock       0.31      0.40      0.35        45
     Brown Ales       0.40      0.31      0.35        59
      Dark Ales       0.46      0.41      0.43        39
    Dark Lagers       0.32      0.31      0.31        62
   Hybrid Beers       0.19      0.15      0.17        33
India Pale Ales       0.68      0.75      0.71        81
      Pale Ales       0.50      0.55      0.53       130
    Pale Lagers       0.66      0.69      0.67       133
        Porters       0.52      0.57      0.54        58
Specialty Beers       0.45      0.40      0.43        94
         Stouts       0.63      0.62      0.62        71
    Strong Ales       0.66      0.69      0.67       112
    Wheat Beers       0.62      0.47      0.54        59
     Wild/Sour        0.85      0.83      0.84        94

       accuracy                           0.56      1070
      macro avg       0.52      0.51      0.51      1070
   weighted avg       0.56   

In [311]:
output_dict

{'Altbier': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5},
 'Barleywine': {'precision': 0.4583333333333333,
  'recall': 0.6470588235294118,
  'f1-score': 0.5365853658536585,
  'support': 17},
 'Bitter': {'precision': 0.6666666666666666,
  'recall': 0.6086956521739131,
  'f1-score': 0.6363636363636365,
  'support': 23},
 'Bière de Champagne / Bière Brut': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 5},
 'Blonde Ale': {'precision': 0.2222222222222222,
  'recall': 0.35294117647058826,
  'f1-score': 0.27272727272727276,
  'support': 17},
 'Bock': {'precision': 0.3125,
  'recall': 0.39215686274509803,
  'f1-score': 0.34782608695652173,
  'support': 51},
 'Braggot': {'precision': 0.5,
  'recall': 0.25,
  'f1-score': 0.3333333333333333,
  'support': 4},
 'Brett Beer': {'precision': 0.25,
  'recall': 0.08333333333333333,
  'f1-score': 0.125,
  'support': 12},
 'Brown Ale': {'precision': 0.2631578947368421,
  'recall': 0.17857142857142858,
  'f1-score'

In [306]:
output_dict["macro avg"]["precision"]

0.36388221679951477

In [380]:
accuracies = []
precisions = []
recalls = []

for n in range(1,31):
    classifier = KNeighborsClassifier(n_neighbors=n)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    # print(confusion_matrix(y_test, y_pred))
    # print(classification_report(y_test, y_pred))
    output_dict = classification_report(y_test, y_pred, output_dict=True)
    accuracies.append(output_dict["accuracy"])
    precisions.append(output_dict["macro avg"]["precision"])
    recalls.append(output_dict["macro avg"]["recall"])

In [381]:
fig = px.scatter(x=range(1,31), y=accuracies)
fig.show()

In [382]:
fig = px.scatter(x=range(1,31), y=precisions)
fig.show()

In [383]:
fig = px.scatter(x=range(1,31), y=recalls)
fig.show()