In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import interp
import scipy.stats
import warnings
from datetime import datetime
from sklearn.preprocessing import QuantileTransformer
import subprocess
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.activations import relu
import tensorflow as tf
warnings.simplefilter("ignore")

In [16]:
def timestamp(): return datetime.today().strftime('%Y%m%d')

In [17]:
def quantile_scale(df,feats):
    qua = df
    scaler = QuantileTransformer(
        n_quantiles=10,
        random_state=42,
        ignore_implicit_zeros=True, #sparse matrix
    )
    # fit the scaler
    scaler.fit(qua[feats])
    # transform values
    qua[feats] = scaler.transform(qua[feats])
    return qua

In [18]:
def tiered(classes):
    trans = []
    for x in classes:
        if x==1: c=0
        if x==2: c=1
        if x>=3 and x<=4: c=2
        if x>=5 and x<=7: c=3
        if x>=8: c=4
        trans.append(c)
    return trans

In [19]:
def merge_feats(episode):
    subprocess.run(f'bash src/get_data_ready.sh {episode}', shell=True)
    merge_col = ['season','baker','episode']
    tech = pd.read_csv(f"deepbake_s12_technical_features.tsv",sep="\t")
    star = pd.read_csv(f"deepbake_judge_features_s12_e{episode}.tsv",sep="\t")
    gbbo = pd.merge(tech, star,  how='left', left_on=merge_col, right_on =merge_col).drop_duplicates()
    gbbo = gbbo[['season','baker','episode','tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']]
    gbbo.to_csv(f"deepbake_features_s12_e{episode}.tsv",sep="\t",index=False)

In [20]:
def process_episode(episode):
    merge_feats(episode)
    gbbo = pd.read_csv(f"deepbake_features_s12_e{episode}.tsv",sep="\t")
    feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
    gbbo = gbbo.loc[gbbo['episode']==episode]
    gbbo = quantile_scale(gbbo,feats)
    return gbbo

In [21]:
def get_training_set(episode):
    tech = pd.read_csv("../../RESULTS/deepbake_features.20210926.tsv",sep='\t')
    tech = tech.loc[tech['episode']==episode]
    feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
    qua = quantile_scale(tech,feats)
    qua['place']=tiered(qua['place'])
    X, y = np.matrix(qua[feats]), np.array(qua['place'])
    return X,y

In [22]:
def create_model( nl1=1, nl2=1,  nl3=1, 
                 nn1=1000, nn2=500, nn3 = 200, lr=0.01, decay=0., l1=0.01, l2=0.01,
                act = 'relu', dropout=0,input_shape=None,output_shape=None):    
    '''This is a model generating function so that we can search over neural net 
    parameters and architecture
    https://www.kaggle.com/arrogantlymodest/randomised-cv-search-over-keras-neural-network
    '''
    opt = tf.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999,  decay=decay)
    reg = keras.regularizers.l1_l2(l1=l1, l2=l2)
    model = Sequential()
    first=True  
    for i in range(nl1):
        if first:
            model.add(Dense(nn1, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn1, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl2):
        if first:
            model.add(Dense(nn2, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn2, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl3):
        if first:
            model.add(Dense(nn3, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn3, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))       
    model.add(Dense(output_shape, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'],)
    return model

In [31]:
def deep_bake(episode):
    feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
    tech = pd.read_csv("../../RESULTS/deepbake_features.20210926.tsv",sep='\t')
    tech['place']=tiered(tech['place'])
    input_shape = len(feats)
    output_shape = len(set(tech['place']))

    l1 = 0.0001
    l2 = 0.0001
    lr = 0.0001
    nl1 = 1
    nl2 = 1
    nl3 = 1
    nn1 = 800
    nn2 = 800
    nn3 = 300
    dropout = 0.1
    decay = 1e-09
    act='relu'
    n_dims = len(feats)
    n_classes = len(set(tech['place']))
    BATCH,EPOCHS = 12, 25
    
    GBBO = pd.read_csv(f"deepbake_features_s12_e{episode}.tsv",sep="\t")
    S11 = pd.DataFrame()
    
    gbbo = GBBO.loc[GBBO['episode']==episode].copy()
    gbbo = quantile_scale(gbbo,feats)
    test = np.matrix(gbbo[feats])

    tech2 = tech.loc[tech['episode']==episode].copy()
    qua = quantile_scale(tech2,feats)
    qua['place']=tiered(qua['place'])

    X, y = np.matrix(qua[feats]), np.array(qua['place'])

    nn = create_model( nl1=nl1, nl2=nl2,  nl3=nl3, 
                     nn1=nn1, nn2=nn2, nn3 = nn3, 
                     lr=lr, decay=decay, l1=l1, l2=l2,
                     act = act, dropout=dropout,
                     input_shape=n_dims,
                     output_shape=n_classes)

    nn.fit(X,y,validation_split=0., batch_size=BATCH, epochs=EPOCHS,verbose=0)
    preds = np.argmax(nn.predict(test) > 0.5, axis=-1).astype("int32")
    probs = nn.predict(test)
    gbbo['preds']=preds
    # probability baker is a finalist
    top = probs[:,0]
    # probability baker is a finalist or a runner-up
    top3 = probs[:,0]+probs[:,1]
    # bottom tier (8th and below)
    bot = probs[:,-1]
    # 5th - 7th
    nextbot = probs[:,-2]
    third = probs[:,-3]

    gbbo['bottom']=np.round(bot*100,decimals=2)
    gbbo['finalist']=np.round(top*100,decimals=2) 
    gbbo['top3'] = np.round(top3*100,decimals=2)
    gbbo['fifthseventh'] = np.round(nextbot*100,decimals=2)
    gbbo['thirdforth'] = np.round(third*100,decimals=2)
    gbbo.to_csv(f"deepbake_s12.week{episode}_predictions.tsv",sep="\t",index=False)

    return gbbo

In [32]:
episode=1
process_episode(episode)

Unnamed: 0,season,baker,episode,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad
0,12,Amanda,1,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0
3,12,Chigs,1,0.909091,0.909091,0.0,0.0,0.0,0.0,0.0,0.0
6,12,Crystelle,1,0.636364,0.636364,0.0,0.0,1.0,1.0,0.0,0.0
9,12,Freya,1,0.090909,0.090909,0.0,0.0,1.0,1.0,0.0,0.0
12,12,George,1,0.181818,0.181818,0.0,0.0,0.0,0.0,1.0,1.0
15,12,Giuseppe,1,0.818182,0.818182,0.0,0.0,1.0,1.0,0.0,0.0
18,12,Jairzinho,1,0.727273,0.727273,0.0,0.0,0.0,0.0,1.0,1.0
21,12,Jürgen,1,0.454545,0.454545,1.0,1.0,1.0,1.0,0.0,0.0
24,12,Lizzie,1,0.272727,0.272727,0.0,0.0,0.0,0.0,0.0,0.0
27,12,Maggie,1,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0


In [33]:
df = deep_bake(episode)

In [34]:
df.sort_values(['finalist'],ascending=False)


Unnamed: 0,season,baker,episode,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad,preds,bottom,finalist,top3,fifthseventh,thirdforth
21,12,Jürgen,1,0.454545,0.454545,1.0,1.0,1.0,1.0,0.0,0.0,2,0.06,27.360001,48.740002,0.04,51.169998
27,12,Maggie,1,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0,0.97,24.530001,65.779999,0.64,32.610001
9,12,Freya,1,0.090909,0.090909,0.0,0.0,1.0,1.0,0.0,0.0,0,0.75,23.84,64.480003,0.48,34.290001
24,12,Lizzie,1,0.272727,0.272727,0.0,0.0,0.0,0.0,0.0,0.0,2,1.35,22.5,41.82,1.09,55.75
6,12,Crystelle,1,0.636364,0.636364,0.0,0.0,1.0,1.0,0.0,0.0,2,0.08,19.24,46.5,0.05,53.369999
30,12,Rochica,1,0.545455,0.545455,0.0,0.0,0.0,0.0,0.0,0.0,2,0.24,18.48,32.080002,0.18,67.489998
15,12,Giuseppe,1,0.818182,0.818182,0.0,0.0,1.0,1.0,0.0,0.0,2,0.03,17.17,38.25,0.02,61.700001
3,12,Chigs,1,0.909091,0.909091,0.0,0.0,0.0,0.0,0.0,0.0,2,0.02,12.93,20.700001,0.01,79.260002
12,12,George,1,0.181818,0.181818,0.0,0.0,0.0,0.0,1.0,1.0,2,0.31,11.4,37.700001,0.19,61.799999
33,12,Tom,1,0.363636,0.363636,0.0,0.0,0.0,0.0,1.0,1.0,2,0.16,9.93,33.25,0.09,66.5
