In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import vapeplot 
from scipy import interp
import scipy.stats
import warnings
warnings.simplefilter("ignore")
%matplotlib inline

In [2]:
from datetime import datetime
def timestamp(): return datetime.today().strftime('%Y%m%d')

from sklearn.preprocessing import QuantileTransformer
def quantile_scale(df,feats):
    qua = df
    scaler = QuantileTransformer(
        n_quantiles=10,
        random_state=42,
        ignore_implicit_zeros=True, #sparse matrix
    )
    # fit the scaler
    scaler.fit(qua[feats])
    # transform values
    qua[feats] = scaler.transform(qua[feats])
    return qua
def tiered(classes):
    trans = []
    for x in classes:
        if x==1: c=0
        if x==2: c=1
        if x>=3 and x<=4: c=2
        if x>=5 and x<=7: c=3
        if x>=8: c=4
        trans.append(c)
    return trans

In [3]:
merge_col = ['season','baker','index','episode','place']
tech = pd.read_csv("../../DATA/s11/deepbake_s11_technical_features.20201016.tsv",sep="\t")
star = pd.read_csv("../../DATA/s11/deepbake_judge_features_s11_e4.20201016.tsv",sep="\t")
gbbo = pd.merge(tech, star,  how='left', left_on=merge_col, right_on =merge_col)
gbbo = gbbo[['season','baker','episode','place','tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']]
gbbo.to_csv("deepbake_features_s11_e4.tsv".format(timestamp()),sep="\t",index=False)
gbbo.head()

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad
0,11,Dave,1,0,9.0,9,0.0,0,0.0,0,0.0,0
1,11,Dave,2,0,5.0,1,0.5,1,0.5,1,0.0,0
2,11,Dave,3,0,6.33,9,0.33,0,0.33,0,0.33,1
3,11,Dave,4,0,6.75,8,0.25,0,0.25,0,0.25,0
4,11,Hermine,1,0,5.0,5,0.0,0,1.0,1,0.0,0


In [4]:
gbbo = pd.read_csv("deepbake_features_s11_e4.tsv",sep="\t")
feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
max_epi = max(gbbo['episode'])
gbbo = gbbo.loc[gbbo['episode']==max_epi]
gbbo = quantile_scale(gbbo,feats)
gbbo.head()

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad
3,11,Dave,4,0,0.909091,0.909091,1.0,0.0,0.555556,0.0,0.611111,0.0
7,11,Hermine,4,0,0.555556,0.636364,0.0,0.0,0.555556,0.0,0.0,0.0
11,11,Laura,4,0,0.455556,0.363636,0.0,0.0,0.0,0.0,0.0,0.0
15,11,Linda,4,0,0.283951,0.272727,0.0,0.0,0.0,0.0,0.611111,0.0
19,11,Loriea,4,1,0.0,0.0,0.0,0.0,0.0,0.0,0.611111,0.0


In [5]:
tech = pd.read_csv("../../RESULTS/deepbake_features.20201016.tsv",sep='\t')
tech = tech.loc[tech['episode']==max_epi]
qua = quantile_scale(tech,feats)
qua['place']=tiered(qua['place'])
X, y = np.matrix(qua[feats]), np.array(qua['place'])

In [7]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.activations import relu

def create_model( nl1=1, nl2=1,  nl3=1, 
                 nn1=1000, nn2=500, nn3 = 200, lr=0.01, decay=0., l1=0.01, l2=0.01,
                act = 'relu', dropout=0,input_shape=None,output_shape=None):    
    '''This is a model generating function so that we can search over neural net 
    parameters and architecture
    https://www.kaggle.com/arrogantlymodest/randomised-cv-search-over-keras-neural-network
    '''
    opt = keras.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999,  decay=decay)
    reg = keras.regularizers.l1_l2(l1=l1, l2=l2)
    model = Sequential()
    first=True  
    for i in range(nl1):
        if first:
            model.add(Dense(nn1, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn1, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl2):
        if first:
            model.add(Dense(nn2, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn2, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl3):
        if first:
            model.add(Dense(nn3, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn3, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))       
    model.add(Dense(output_shape, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'],)
    return model
##################################
feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
tech = pd.read_csv("../../RESULTS/deepbake_features.20201016.tsv",sep='\t')
tech['place']=tiered(tech['place'])
input_shape = len(feats)
output_shape = len(set(tech['place']))


l1 = 0.0001
l2 = 0.0001
lr = 0.0001
nl1 = 1
nl2 = 1
nl3 = 1
nn1 = 800
nn2 = 800
nn3 = 300

dropout = 0.1
decay = 1e-09
act='relu'
n_dims = len(feats)
n_classes = len(set(tech['place']))


BATCH,EPOCHS = 12, 25

In [14]:
GBBO = pd.read_csv("deepbake_features_s11_e4.tsv",sep="\t")
S11 = pd.DataFrame()
for e in set(GBBO['episode']):
    gbbo = GBBO.loc[GBBO['episode']==e]
    gbbo = quantile_scale(gbbo,feats)
    test = np.matrix(gbbo[feats])

    tech = pd.read_csv("../../RESULTS/deepbake_features.20201016.tsv",sep='\t')
    tech = tech.loc[tech['episode']==e]
    qua = quantile_scale(tech,feats)
    qua['place']=tiered(qua['place'])
    
    X, y = np.matrix(qua[feats]), np.array(qua['place'])
    
    nn = create_model( nl1=nl1, nl2=nl2,  nl3=nl3, 
                     nn1=nn1, nn2=nn2, nn3 = nn3, 
                     lr=lr, decay=decay, l1=l1, l2=l2,
                     act = act, dropout=dropout,
                     input_shape=n_dims,
                     output_shape=n_classes)
    
    nn.fit(X,y,validation_split=0., batch_size=BATCH, epochs=EPOCHS,verbose=0)

    preds = nn.predict_classes(test)
    probs = nn.predict_proba(test)
    gbbo['preds']=preds
    # probability baker is a finalist
    top = probs[:,0]
    # probability baker is a finalist or a runner-up
    top3 = probs[:,0]+probs[:,1]
    # bottom tier (8th and below)
    bot = probs[:,-1]
    # 5th - 7th
    nextbot = probs[:,-2]
    third = probs[:,-3]

    gbbo['bottom']=np.round(bot*100,decimals=2)
    gbbo['finalist']=np.round(top*100,decimals=2) 
    gbbo['top3'] = np.round(top3*100,decimals=2)
    gbbo['fifthseventh'] = np.round(nextbot*100,decimals=2)
    gbbo['thirdforth'] = np.round(third*100,decimals=2)
    S11 = pd.concat([S11,gbbo])
    gbbo.to_csv("deepbake_s11.week{}_preditions.txt".format(e),sep="\t",index=False)
    print(e)

1
2
3
4


In [13]:
gbbo.sort_values(['top3'],ascending=False)

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad,preds,bottom,finalist,top3,fifthseventh,thirdforth
39,11,Peter,4,0,0.380952,0.545455,1.0,0.0,1.0,1.0,0.611111,0.0,1,6.85,12.84,66.279999,12.52,14.35
35,11,Mark,4,0,0.188889,0.454545,1.0,1.0,1.0,1.0,0.0,0.0,1,2.54,6.76,64.059998,24.620001,8.78
31,11,Marc,4,0,1.0,0.727273,1.0,0.0,1.0,0.0,0.611111,0.0,4,36.450001,14.71,28.4,13.85,21.299999
7,11,Hermine,4,0,0.555556,0.636364,0.0,0.0,0.555556,0.0,0.0,0.0,3,15.62,14.12,27.540001,30.700001,26.15
3,11,Dave,4,0,0.909091,0.909091,1.0,0.0,0.555556,0.0,0.611111,0.0,4,34.189999,8.81,19.790001,23.23,22.790001
11,11,Laura,4,0,0.455556,0.363636,0.0,0.0,0.0,0.0,0.0,0.0,3,26.17,8.39,19.059999,32.110001,22.67
47,11,Sura,4,4,0.555556,0.818182,0.0,0.0,1.0,0.0,0.611111,1.0,3,29.379999,7.28,9.69,35.700001,25.23
15,11,Linda,4,0,0.283951,0.272727,0.0,0.0,0.0,0.0,0.611111,0.0,4,62.209999,3.51,6.48,17.43,13.88
19,11,Loriea,4,1,0.0,0.0,0.0,0.0,0.0,0.0,0.611111,0.0,4,74.459999,3.75,6.2,10.32,9.01
27,11,Mak,4,2,0.058824,0.0,0.0,0.0,0.0,0.0,0.611111,0.0,4,76.610001,3.35,5.58,9.26,8.55
