In [12]:
# Import Dependencies
import sqlite3
import csv
import pandas as pd

In [13]:
# Display all columns
pd.set_option('display.max_columns', None)

In [14]:
# Connect to  database
connection = sqlite3.connect('SQL/beer.sqlite')

In [23]:
# Importing CSV of data with Top 250 beers into Pandas
df = pd.read_csv("Webscraping/all_scraped_with_features_reduced.csv")

In [24]:
len(df)

3520

In [25]:
# Make columns lowercase
df.columns=df.columns.str.lower()

In [26]:
df.columns

Index(['beer_name', 'beer_style', 'brewery', 'abv', 'rating', 'astringent',
       'body', 'alcoholic', 'bitter', 'sweet', 'sour', 'salty', 'fruity',
       'hoppy', 'spices', 'malty'],
      dtype='object')

In [27]:
df.rename(columns={'astringent': 'astringency', 
                   'alcoholic': 'alcohol',
                  'fruity':'fruits'}, inplace=True)

In [28]:
# Create column summing number of reviews for each factor
df["sum_of_factors"] = df.astringency + df.body + df.alcohol + df.bitter + df.sweet + df.sour + df.salty + df.fruits + df.hoppy + df.spices + df.malty

In [29]:
# List of dictionaries for the major styles and their substyles

styles = [
    {"style": "Bock", "substyles":["Bock - Doppelbock", "Bock - Eisbock", "Bock - Maibock", "Bock - Traditional", "Bock - Weizenbock"]},
    {"style": "Brown Ales", "substyles": ["Altbier", "Brown Ale - American", "Brown Ale - Belgian Dark", "Brown Ale - English", "Mild Ale - English Dark"]},
    {"style": "Dark Ales", "substyles": ["Dubbel", "Rye Beer - Roggenbier", "Scottish Ale", "Winter Warmer"]},
    {"style": "Dark Lagers", "substyles": ["Lager - American Amber / Red", "Lager - European Dark", "Lager - Märzen", "Lager - Munich Dunkel", "Lager - Rauchbier", "Lager - Schwarzbier", "Lager - Vienna"]},
    {"style": "Hybrid Beers", "substyles": ["Bière de Champagne / Bière Brut", "Braggot", "California Common / Steam Beer", "Cream Ale"]},
    {"style": "India Pale Ales", "substyles": ["IPA - American", "IPA - Belgian", "IPA - Black / Cascadian Dark Ale", "IPA - Brut", "IPA - English", "IPA - Imperial", "IPA - New England"]},
    {"style": "Pale Ales", "substyles": ["Bitter - English", "Bitter - English Extra Special / Strong Bitter (ESB)", "Blonde Ale - Belgian", "Blonde Ale - American", "Farmhouse Ale - Bière de Garde", "Farmhouse Ale - Saison", "Kölsch", "Mild Ale - English Pale", "Pale Ale - American", "Pale Ale - Belgian", "Pale Ale - English", "Red Ale - American Amber / Red", "Red Ale - Irish"]},
    {"style": "Porters", "substyles": ["Porter - American", "Porter - Baltic", "Porter - English", "Porter - Imperial", "Porter - Robust", "Porter - Smoked"]},
    {"style": "Specialty Beers", "substyles": ["Chile Beer", "Farmhouse Ale - Sahti", "Fruit and Field Beer", "Gruit / Ancient Herbed Ale", "Happoshu", "Herb and Spice Beer", "Kvass", "Lager - Japanese Rice", "Low Alcohol Beer", "Pumpkin Beer", "Rye Beer", "Smoked Beer"]},
    {"style": "Stouts", "substyles":  ["Stout - Sweet / Milk", "Stout - Russian Imperial", "Stout - Oatmeal", "Stout - Irish Dry", "Stout - Foreign / Export", "Stout - English", "Stout - American Imperial", "Stout - American" ]},
    {"style": "Strong Ales", "substyles": ["Wheat Beer - Wheatwine", "Tripel", "Strong Ale - English", "Strong Ale - Belgian Pale", "Strong Ale - Belgian Dark" , "Strong Ale - American", "Scotch Ale / Wee Heavy", "Red Ale - Imperial", "Quadrupel (Quad)", "Old Ale", "Barleywine - English", "Barleywine - American"]},
    {"style": "Wheat Beers", "substyles": ["Wheat Beer - Witbier", "Wheat Beer - Kristallweizen", "Wheat Beer - Hefeweizen", "Wheat Beer - Dunkelweizen", "Wheat Beer - American Pale", "Wheat Beer - American Dark"]},
    {"style": "Wild/Sour ", "substyles": [ "Brett Beer", "Lambic - Faro", "Lambic - Fruit", "Lambic - Gueuze", "Lambic - Traditional", "Sour - Berliner Weisse", "Sour - Flanders Oud Bruin", "Sour - Flanders Red Ale", "Sour - Fruited Kettle Sour", "Sour - Gose", "Wild Ale"]},
    {"style": "Pale Lagers", "substyles": ["Lager - Adjunct", "Lager - American", "Lager - European / Dortmunder Export", "Lager - European Pale", "Lager - European Strong", "Lager - Festbier / Wiesnbier", "Lager - Helles", "Lager - India Pale Lager (IPL)", "Lager - India Pale Lager", "Lager - Kellerbier / Zwickelbier", "Lager - Light", "Lager - Malt Liquor", "Pilsner - Bohemian / Czech", "Pilsner - German", "Pilsner - Imperial", "Lager - Märzen / Oktoberfest"]}
]

In [30]:
# Function to return main style based on substyle 

def get_big_style(x):
    for item in styles:
        if x in item["substyles"]:
            return item["style"]

In [31]:
# Apply Function and create new column
df["BA_Big_styles"] = df.beer_style.apply(get_big_style)

In [32]:
df

Unnamed: 0,beer_name,beer_style,brewery,abv,rating,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,sum_of_factors,BA_Big_styles
0,Ayinger Celebrator,Bock - Doppelbock,Ayinger Privatbrauerei,6.7%,4.37,23,144,43,70,189,37,2,61,45,27,23,664,Bock
1,Troegenator,Bock - Doppelbock,Tröegs Brewing Company,8.2%,3.97,21,128,59,51,210,48,0,74,52,18,21,682,Bock
2,Spaten Optimator,Bock - Doppelbock,Spaten-Franziskaner-Bräu,7.6%,3.92,17,155,51,63,155,37,0,54,54,18,17,621,Bock
3,Salvator,Bock - Doppelbock,Paulaner Brauerei,7.9%,3.96,21,109,83,41,200,58,0,82,53,25,21,693,Bock
4,Korbinian,Bock - Doppelbock,Bayerische Staatsbrauerei Weihenstephan,7.4%,4.21,30,155,41,62,188,56,1,84,52,21,30,720,Bock
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3515,Cuvée De Castleton,Wild Ale,Captain Lawrence Brewing Co.,8.4%,4.39,100,67,30,8,203,432,0,230,28,17,100,1215,Wild/Sour
3516,The Wild One,Wild Ale,Bell's Brewery - Eccentric Café & General Store,6.3%,4.17,89,73,25,6,117,335,0,142,18,21,89,915,Wild/Sour
3517,Figaro,Wild Ale,Cascade Brewing / Raccoon Lodge & Brewpub,9.5%,4.21,75,59,32,25,186,390,0,260,13,17,75,1132,Wild/Sour
3518,Raspberry,Wild Ale,Upland Brewing Company,6%,4.18,101,74,20,8,252,359,4,243,10,6,101,1178,Wild/Sour


In [39]:
def percent_to_int(x):
    x = x.replace("%","")
    x = float(x)
    return
    

In [42]:
df = df[df.abv != "Score:"]

In [44]:
# Get rid of "%" and turn into float
df["abv"] = df.abv.apply(lambda x: float(x.replace("%", "")))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [45]:
df.head()

Unnamed: 0,beer_name,beer_style,brewery,abv,rating,astringency,body,alcohol,bitter,sweet,sour,salty,fruits,hoppy,spices,malty,sum_of_factors,BA_Big_styles
0,Ayinger Celebrator,Bock - Doppelbock,Ayinger Privatbrauerei,6.7,4.37,23,144,43,70,189,37,2,61,45,27,23,664,Bock
1,Troegenator,Bock - Doppelbock,Tröegs Brewing Company,8.2,3.97,21,128,59,51,210,48,0,74,52,18,21,682,Bock
2,Spaten Optimator,Bock - Doppelbock,Spaten-Franziskaner-Bräu,7.6,3.92,17,155,51,63,155,37,0,54,54,18,17,621,Bock
3,Salvator,Bock - Doppelbock,Paulaner Brauerei,7.9,3.96,21,109,83,41,200,58,0,82,53,25,21,693,Bock
4,Korbinian,Bock - Doppelbock,Bayerische Staatsbrauerei Weihenstephan,7.4,4.21,30,155,41,62,188,56,1,84,52,21,30,720,Bock


In [14]:
df.to_sql("combined_beer_data_major_style", connection)