---
## Hops Studies
---

### Step 1: Importing libraries, loading, formatting and normalizing data

In [97]:
import numpy as np
import pandas as pd

In [98]:
%run utils.py

In [99]:
# Loading data from csv
data = load_from_csv(1, 10)

In [100]:
# Formatting data
data = format_data(data)

In [101]:
recipes = data['recipes']
styles = data['styles']
hops = data['hops']

In [102]:
hops[:3]

Unnamed: 0,beer_id,beer_name,name,origin,alpha,beta,amount,use,form,time,notes
0,big-bens-brown-ale,Big Ben's Brown Ale,Centennial (US),United States,10.5,,14.0,Boil,Pellet,60.0,American aroma-type variety bred in 1974 and r...
1,big-bens-brown-ale,Big Ben's Brown Ale,Cascade (US),United States,5.75,,14.0,Boil,Pellet,30.0,Very successful and well-established American ...
2,big-bens-brown-ale,Big Ben's Brown Ale,Cascade (US),United States,5.75,,14.0,Boil,Pellet,15.0,Very successful and well-established American ...


### Normalizing quantities (except for Miscs)
Quantities ('amount' field) has to be normalized relatively to the size of the batch.

In [103]:
cols = hops.keys()
# Selecting features
cols = ['beer_id', 'name', 'alpha', 'use', 'time']
hops = pd.merge(recipes, hops, on=['beer_id', 'beer_name'], how='left')
hops = hops[hops.batch_size != 0]
hops['weight_rel'] = hops['amount'] / hops['batch_size']
cols.append('weight_rel')
hops = hops[cols]

In [104]:
hops[:5]

Unnamed: 0,beer_id,name,alpha,use,time,weight_rel
0,big-bens-brown-ale,Centennial (US),10.5,Boil,60.0,0.739682
1,big-bens-brown-ale,Cascade (US),5.75,Boil,30.0,0.739682
2,big-bens-brown-ale,Cascade (US),5.75,Boil,15.0,0.739682
3,big-bens-brown-ale,Centennial (US),10.5,Boil,5.0,0.739682
4,lemon-saison-1,Citra (US),14.0,Boil,60.0,0.591746


### Step 2: Grouping data and performing aggregations

In [105]:
# Grouping Hops data on beer_id
hops_grouped = hops.groupby('beer_id')

#### 2.1: Hops aggregations
Various aggregations on Hops. The three following cells define three different aggregations of hops on 'beer_id'.
+ 1: list hops (unique) with quantity in recipe
+ 2: list hops by use (Boil, Dry Hop, ...)
+ 3: Put essential informations for all hops in one dict (doesn't feel right...)

##### 2.1.1. Hops (unique) and quantity

In [106]:
# Play on params_lvl_1, params_lvl_2 and f to change aggregation rules
params_lvl_2_1 = ['name', 'alpha', 'amount', 'use', 'time']
params_lvl_1_1 = ['hops']
def f_1(l):
    d = {key: '' for key in params_lvl_1_1}
    # begin
    d['hops'] = []
    for el in l:
        name = el['name']
        if name not in d['hops']:
            d['hops'].append({'name': name, 'amount': el['amount']})
        else:
            d['hops']['amount'] = d['hops']['amount'] + el['amount']
    # end
    return d

##### 2.1.2. Hops per use

In [107]:
# Hops per use
params_lvl_2_2 = ['name', 'alpha', 'amount', 'use', 'time']
params_lvl_1_2 = list(hops['use'].drop_duplicates())
def f_2(l):
    d = {key: [] for key in params_lvl_1_2}
    # begin
    for el in l:
        name = el['name']
        use = el['use']
        if name not in d[use]:
            d[use].append(name)
        else:
            d[use] = [name]
    # end
    return d

##### 2.1.3. Comprehensive Hops additions

In [108]:
# Hops with time
params_lvl_2_3 = ['name', 'alpha', 'amount', 'use', 'time']
params_lvl_1_3 = list(hops['use'].drop_duplicates())
def f_3(l):
    d = {key: [] for key in params_lvl_1_3}
    # begin
    for el in l:
        name = el['name']
        use = el['use'] 
        d[use].append({'name': name, 'time': el['time'], 'amount': el['amount'], 'alpha': el['alpha']})
    # end
    return d

##### Computing one of the options above

In [109]:
f = f_2
params_lvl_1 = params_lvl_1_2
params_lvl_2 = params_lvl_2_2

In [110]:
recipes_list = []
for key in hops_grouped.groups.keys():
    hops_list_out = []
    hops_list_in = hops_grouped.groups[key]
    for hop_nb in hops_list_in:
        hop = hops.loc[hop_nb]
        hops_list_out.append(dict(hop[params_lvl_2]))
    #    
    recipe_dict = f(hops_list_out)
    recipe_dict['beer_id'] = key
    recipes_list.append(recipe_dict)
hops_steps = pd.DataFrame(recipes_list)

In [111]:
hops_steps[:5]

Unnamed: 0,nan,Boil,Dry Hop,First Wort,Mash,Whirlpool,beer_id
0,[],"[Galena (US), Cascade (US)]",[Cascade (US)],[],[],[],indica-clone
1,[],[Fuggle (UK)],[],[],[],[],nut-brown-ale-102
2,[],[Williamette (US)],[],[],[],[],dont-push-back-strawberry-blonde
3,[],[Willamette (US)],[],[],[],[],orange-door-blonde
4,[],[Hallertau (DE)],[],[],[],[],golden-magic-hefeweizen


##### 2.1.4. A list of DataFrames
Builds a list of Hops DataFrames(ordered by **time**), one for each recipe.
This is the sort of data we could use to train a **RNN**.

In [116]:
params = ['time', 'name', 'weight_rel', 'alpha']
groups = hops_grouped.groups
recipes_hops_dfs = []
for group in groups:
    l = []
    for elt in groups[group]:
        d = {x: '' for x in params}
        hop = hops.loc[elt]
        for param in params:
            d[param] = hop[param]
        if hop['use'] == 'Dry Hop':
            d['Dry Hop'] = True
        else:
            d['Dry Hop'] = False
        l.append(d)
        recipes_hops_dfs.append(pd.DataFrame(l))

In [133]:
recipes_hops_dfs[24]

Unnamed: 0,Dry Hop,alpha,name,time,weight_rel
0,False,5.8,Kent Golding (UK),20.0,0.739682
1,False,5.8,Kent Golding (UK),60.0,2.219045
2,False,5.8,Kent Golding (UK),0.0,1.479363
