In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import vapeplot 
from scipy import interp
import scipy.stats
import warnings
warnings.simplefilter("ignore")
%matplotlib inline

In [12]:
from datetime import datetime
def timestamp(): return datetime.today().strftime('%Y%m%d')

from sklearn.preprocessing import QuantileTransformer
def quantile_scale(df,feats):
    qua = df
    scaler = QuantileTransformer(
        n_quantiles=10,
        random_state=42,
        ignore_implicit_zeros=True, #sparse matrix
    )
    # fit the scaler
    scaler.fit(qua[feats])
    # transform values
    qua[feats] = scaler.transform(qua[feats])
    return qua
def tiered(classes):
    trans = []
    for x in classes:
        if x==1: c=0
        if x==2: c=1
        if x>=3 and x<=4: c=2
        if x>=5 and x<=7: c=3
        if x>=8: c=4
        trans.append(c)
    return trans

In [13]:
EPI=8
merge_col = ['season','baker','index','episode','place']
tech = pd.read_csv("../../DATA/s11/deepbake_s11_technical_features.20201113.tsv",sep="\t")
star = pd.read_csv("../../DATA/s11/deepbake_judge_features_s11_e8.20201113.tsv",sep="\t")
gbbo = pd.merge(tech, star,  how='left', left_on=merge_col, right_on =merge_col)
gbbo = gbbo[['season','baker','episode','place','tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']]
gbbo.to_csv("deepbake_features_s11_e{}.tsv".format(EPI),sep="\t",index=False)
gbbo.head()

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad
0,11,Dave,1,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,11,Dave,2,0,0.0,0,0.5,1.0,0.5,1.0,0.0,0.0
2,11,Dave,3,0,0.0,0,0.33,0.0,0.33,0.0,0.33,1.0
3,11,Dave,4,0,0.0,0,0.25,0.0,0.25,0.0,0.25,0.0
4,11,Dave,5,0,0.0,0,0.2,0.0,0.4,1.0,0.2,0.0


In [14]:
gbbo = pd.read_csv("deepbake_features_s11_e{}.tsv".format(EPI),sep="\t")
feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
max_epi = EPI
gbbo = gbbo.loc[gbbo['episode']==max_epi]
gbbo = quantile_scale(gbbo,feats)
gbbo.head()

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad
7,11,Dave,8,0,0.15478,1.0,0.722222,0.0,0.820261,0.0,0.0,0.0
16,11,Hermine,8,0,0.638095,0.818182,1.0,1.0,1.0,1.0,0.0,0.0
25,11,Laura,8,0,0.895558,0.636364,0.722222,0.0,0.611111,1.0,0.666667,0.0
34,11,Linda,8,0,0.549778,0.0,0.0,0.0,0.0,0.0,1.0,0.0
43,11,Loriea,8,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
tech = pd.read_csv("../../RESULTS/deepbake_features.20201016.tsv",sep='\t')
tech = tech.loc[tech['episode']==max_epi]
qua = quantile_scale(tech,feats)
qua['place']=tiered(qua['place'])
X, y = np.matrix(qua[feats]), np.array(qua['place'])

In [16]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.activations import relu

def create_model( nl1=1, nl2=1,  nl3=1, 
                 nn1=1000, nn2=500, nn3 = 200, lr=0.01, decay=0., l1=0.01, l2=0.01,
                act = 'relu', dropout=0,input_shape=None,output_shape=None):    
    '''This is a model generating function so that we can search over neural net 
    parameters and architecture
    https://www.kaggle.com/arrogantlymodest/randomised-cv-search-over-keras-neural-network
    '''
    opt = keras.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999,  decay=decay)
    reg = keras.regularizers.l1_l2(l1=l1, l2=l2)
    model = Sequential()
    first=True  
    for i in range(nl1):
        if first:
            model.add(Dense(nn1, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn1, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl2):
        if first:
            model.add(Dense(nn2, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn2, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl3):
        if first:
            model.add(Dense(nn3, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn3, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))       
    model.add(Dense(output_shape, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'],)
    return model
##################################
feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
tech = pd.read_csv("../../RESULTS/deepbake_features.20201016.tsv",sep='\t')
tech['place']=tiered(tech['place'])
input_shape = len(feats)
output_shape = len(set(tech['place']))


l1 = 0.0001
l2 = 0.0001
lr = 0.0001
nl1 = 1
nl2 = 1
nl3 = 1
nn1 = 800
nn2 = 800
nn3 = 300

dropout = 0.1
decay = 1e-09
act='relu'
n_dims = len(feats)
n_classes = len(set(tech['place']))


BATCH,EPOCHS = 12, 25

In [17]:
GBBO = pd.read_csv("deepbake_features_s11_e{}.tsv".format(EPI),sep="\t")
S11 = pd.DataFrame()
GBBO = GBBO.loc[GBBO['episode']<=EPI]
for e in set(GBBO['episode']):
    gbbo = GBBO.loc[GBBO['episode']==e]
    gbbo = quantile_scale(gbbo,feats)
    test = np.matrix(gbbo[feats])

    tech = pd.read_csv("../../RESULTS/deepbake_features.20201016.tsv",sep='\t')
    tech = tech.loc[tech['episode']==e]
    qua = quantile_scale(tech,feats)
    qua['place']=tiered(qua['place'])
    
    X, y = np.matrix(qua[feats]), np.array(qua['place'])
    
    nn = create_model( nl1=nl1, nl2=nl2,  nl3=nl3, 
                     nn1=nn1, nn2=nn2, nn3 = nn3, 
                     lr=lr, decay=decay, l1=l1, l2=l2,
                     act = act, dropout=dropout,
                     input_shape=n_dims,
                     output_shape=n_classes)
    
    nn.fit(X,y,validation_split=0., batch_size=BATCH, epochs=EPOCHS,verbose=0)

    preds = nn.predict_classes(test)
    probs = nn.predict_proba(test)
    gbbo['preds']=preds
    # probability baker is a finalist
    top = probs[:,0]
    # probability baker is a finalist or a runner-up
    top3 = probs[:,0]+probs[:,1]
    # bottom tier (8th and below)
    bot = probs[:,-1]
    # 5th - 7th
    nextbot = probs[:,-2]
    third = probs[:,-3]

    gbbo['bottom']=np.round(bot*100,decimals=2)
    gbbo['finalist']=np.round(top*100,decimals=2) 
    gbbo['top3'] = np.round(top3*100,decimals=2)
    gbbo['fifthseventh'] = np.round(nextbot*100,decimals=2)
    gbbo['thirdforth'] = np.round(third*100,decimals=2)
    S11 = pd.concat([S11,gbbo])
    gbbo.to_csv("deepbake_s11.week{}_preditions.txt".format(e),sep="\t",index=False)
    print(e)

1
2
3
4
5
6
7
8


In [18]:
gbbo.sort_values(['top3'],ascending=False)

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad,preds,bottom,finalist,top3,fifthseventh,thirdforth
16,11,Hermine,8,0,0.638095,0.818182,1.0,1.0,1.0,1.0,0.0,0.0,1,1.91,19.07,90.32,3.11,4.66
7,11,Dave,8,0,0.15478,1.0,0.722222,0.0,0.820261,0.0,0.0,0.0,1,6.41,26.23,75.82,5.67,12.1
25,11,Laura,8,0,0.895558,0.636364,0.722222,0.0,0.611111,1.0,0.666667,0.0,1,9.48,9.94,59.27,12.07,19.190001
88,11,Peter,8,0,0.679226,0.909091,0.722222,0.0,0.611111,0.0,0.666667,1.0,2,7.79,12.3,29.540001,4.74,57.93
52,11,Lottie,8,0,0.857877,0.0,0.722222,0.0,0.365432,0.0,0.0,0.0,3,24.58,7.22,21.4,43.16,10.86
70,11,Marc,8,0,1.0,0.727273,0.722222,0.0,1.0,0.0,1.0,1.0,2,11.72,5.87,17.530001,11.8,58.950001
106,11,Sura,8,4,0.318339,0.0,0.0,0.0,0.611111,0.0,0.0,0.0,4,38.939999,7.01,15.25,35.360001,10.44
43,11,Loriea,8,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,46.169998,7.07,13.83,29.07,10.94
61,11,Mak,8,2,0.068063,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,46.380001,6.42,12.64,30.610001,10.37
79,11,Mark,8,0,0.365432,0.0,0.722222,0.0,0.611111,0.0,0.666667,0.0,3,39.73,1.87,4.57,50.419998,5.28


In [19]:
gbbo.sort_values(['thirdforth'],ascending=False)

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad,preds,bottom,finalist,top3,fifthseventh,thirdforth
70,11,Marc,8,0,1.0,0.727273,0.722222,0.0,1.0,0.0,1.0,1.0,2,11.72,5.87,17.530001,11.8,58.950001
88,11,Peter,8,0,0.679226,0.909091,0.722222,0.0,0.611111,0.0,0.666667,1.0,2,7.79,12.3,29.540001,4.74,57.93
25,11,Laura,8,0,0.895558,0.636364,0.722222,0.0,0.611111,1.0,0.666667,0.0,1,9.48,9.94,59.27,12.07,19.190001
7,11,Dave,8,0,0.15478,1.0,0.722222,0.0,0.820261,0.0,0.0,0.0,1,6.41,26.23,75.82,5.67,12.1
43,11,Loriea,8,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,46.169998,7.07,13.83,29.07,10.94
52,11,Lottie,8,0,0.857877,0.0,0.722222,0.0,0.365432,0.0,0.0,0.0,3,24.58,7.22,21.4,43.16,10.86
106,11,Sura,8,4,0.318339,0.0,0.0,0.0,0.611111,0.0,0.0,0.0,4,38.939999,7.01,15.25,35.360001,10.44
61,11,Mak,8,2,0.068063,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,46.380001,6.42,12.64,30.610001,10.37
79,11,Mark,8,0,0.365432,0.0,0.722222,0.0,0.611111,0.0,0.666667,0.0,3,39.73,1.87,4.57,50.419998,5.28
16,11,Hermine,8,0,0.638095,0.818182,1.0,1.0,1.0,1.0,0.0,0.0,1,1.91,19.07,90.32,3.11,4.66


In [20]:
gbbo.sort_values(['finalist'],ascending=False)

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad,preds,bottom,finalist,top3,fifthseventh,thirdforth
7,11,Dave,8,0,0.15478,1.0,0.722222,0.0,0.820261,0.0,0.0,0.0,1,6.41,26.23,75.82,5.67,12.1
16,11,Hermine,8,0,0.638095,0.818182,1.0,1.0,1.0,1.0,0.0,0.0,1,1.91,19.07,90.32,3.11,4.66
88,11,Peter,8,0,0.679226,0.909091,0.722222,0.0,0.611111,0.0,0.666667,1.0,2,7.79,12.3,29.540001,4.74,57.93
25,11,Laura,8,0,0.895558,0.636364,0.722222,0.0,0.611111,1.0,0.666667,0.0,1,9.48,9.94,59.27,12.07,19.190001
52,11,Lottie,8,0,0.857877,0.0,0.722222,0.0,0.365432,0.0,0.0,0.0,3,24.58,7.22,21.4,43.16,10.86
43,11,Loriea,8,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,46.169998,7.07,13.83,29.07,10.94
106,11,Sura,8,4,0.318339,0.0,0.0,0.0,0.611111,0.0,0.0,0.0,4,38.939999,7.01,15.25,35.360001,10.44
61,11,Mak,8,2,0.068063,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,46.380001,6.42,12.64,30.610001,10.37
70,11,Marc,8,0,1.0,0.727273,0.722222,0.0,1.0,0.0,1.0,1.0,2,11.72,5.87,17.530001,11.8,58.950001
79,11,Mark,8,0,0.365432,0.0,0.722222,0.0,0.611111,0.0,0.666667,0.0,3,39.73,1.87,4.57,50.419998,5.28
