In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import vapeplot 
from scipy import interp
import scipy.stats
import warnings
warnings.simplefilter("ignore")
%matplotlib inline

In [2]:
from datetime import datetime
def timestamp(): return datetime.today().strftime('%Y%m%d')

from sklearn.preprocessing import QuantileTransformer
def quantile_scale(df,feats):
    qua = df
    scaler = QuantileTransformer(
        n_quantiles=10,
        random_state=42,
        ignore_implicit_zeros=True, #sparse matrix
    )
    # fit the scaler
    scaler.fit(qua[feats])
    # transform values
    qua[feats] = scaler.transform(qua[feats])
    return qua
def tiered(classes):
    trans = []
    for x in classes:
        if x==1: c=0
        if x==2: c=1
        if x>=3 and x<=4: c=2
        if x>=5 and x<=7: c=3
        if x>=8: c=4
        trans.append(c)
    return trans

In [3]:
EPI=6
merge_col = ['season','baker','index','episode','place']
tech = pd.read_csv("../../DATA/s11/deepbake_s11_technical_features.20201030.tsv",sep="\t")
star = pd.read_csv("../../DATA/s11/deepbake_judge_features_s11_e6.20201030.tsv",sep="\t")
gbbo = pd.merge(tech, star,  how='left', left_on=merge_col, right_on =merge_col)
gbbo = gbbo[['season','baker','episode','place','tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']]
gbbo.to_csv("deepbake_features_s11_e{}.tsv".format(EPI),sep="\t",index=False)
gbbo.head()

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad
0,11,Dave,1,0,0.0,0,0.0,0,0.0,0,0.0,0
1,11,Dave,2,0,0.0,0,0.5,1,0.5,1,0.0,0
2,11,Dave,3,0,0.0,0,0.33,0,0.33,0,0.33,1
3,11,Dave,4,0,0.0,0,0.25,0,0.25,0,0.25,0
4,11,Dave,5,0,0.0,0,0.2,0,0.4,1,0.2,0


In [4]:
gbbo = pd.read_csv("deepbake_features_s11_e{}.tsv".format(EPI),sep="\t")
feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
max_epi = EPI
gbbo = gbbo.loc[gbbo['episode']==max_epi]
gbbo = quantile_scale(gbbo,feats)
gbbo.head()

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad
5,11,Dave,6,0,0.0,0.636364,0.666667,0.0,1.0,1.0,0.0,0.0
11,11,Hermine,6,0,0.709114,0.727273,0.0,0.0,0.666667,0.0,0.0,1.0
17,11,Laura,6,0,0.895683,1.0,0.666667,0.0,0.444444,0.0,0.0,1.0
23,11,Linda,6,0,0.635701,0.0,0.0,0.0,0.0,0.0,0.777778,0.0
29,11,Loriea,6,1,0.107834,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
tech = pd.read_csv("../../RESULTS/deepbake_features.20201016.tsv",sep='\t')
tech = tech.loc[tech['episode']==max_epi]
qua = quantile_scale(tech,feats)
qua['place']=tiered(qua['place'])
X, y = np.matrix(qua[feats]), np.array(qua['place'])

In [6]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.activations import relu

def create_model( nl1=1, nl2=1,  nl3=1, 
                 nn1=1000, nn2=500, nn3 = 200, lr=0.01, decay=0., l1=0.01, l2=0.01,
                act = 'relu', dropout=0,input_shape=None,output_shape=None):    
    '''This is a model generating function so that we can search over neural net 
    parameters and architecture
    https://www.kaggle.com/arrogantlymodest/randomised-cv-search-over-keras-neural-network
    '''
    opt = keras.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999,  decay=decay)
    reg = keras.regularizers.l1_l2(l1=l1, l2=l2)
    model = Sequential()
    first=True  
    for i in range(nl1):
        if first:
            model.add(Dense(nn1, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn1, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl2):
        if first:
            model.add(Dense(nn2, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn2, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl3):
        if first:
            model.add(Dense(nn3, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn3, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))       
    model.add(Dense(output_shape, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'],)
    return model
##################################
feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
tech = pd.read_csv("../../RESULTS/deepbake_features.20201016.tsv",sep='\t')
tech['place']=tiered(tech['place'])
input_shape = len(feats)
output_shape = len(set(tech['place']))


l1 = 0.0001
l2 = 0.0001
lr = 0.0001
nl1 = 1
nl2 = 1
nl3 = 1
nn1 = 800
nn2 = 800
nn3 = 300

dropout = 0.1
decay = 1e-09
act='relu'
n_dims = len(feats)
n_classes = len(set(tech['place']))


BATCH,EPOCHS = 12, 25

In [7]:
GBBO = pd.read_csv("deepbake_features_s11_e{}.tsv".format(EPI),sep="\t")
S11 = pd.DataFrame()
GBBO = GBBO.loc[GBBO['episode']<=EPI]
for e in set(GBBO['episode']):
    gbbo = GBBO.loc[GBBO['episode']==e]
    gbbo = quantile_scale(gbbo,feats)
    test = np.matrix(gbbo[feats])

    tech = pd.read_csv("../../RESULTS/deepbake_features.20201016.tsv",sep='\t')
    tech = tech.loc[tech['episode']==e]
    qua = quantile_scale(tech,feats)
    qua['place']=tiered(qua['place'])
    
    X, y = np.matrix(qua[feats]), np.array(qua['place'])
    
    nn = create_model( nl1=nl1, nl2=nl2,  nl3=nl3, 
                     nn1=nn1, nn2=nn2, nn3 = nn3, 
                     lr=lr, decay=decay, l1=l1, l2=l2,
                     act = act, dropout=dropout,
                     input_shape=n_dims,
                     output_shape=n_classes)
    
    nn.fit(X,y,validation_split=0., batch_size=BATCH, epochs=EPOCHS,verbose=0)

    preds = nn.predict_classes(test)
    probs = nn.predict_proba(test)
    gbbo['preds']=preds
    # probability baker is a finalist
    top = probs[:,0]
    # probability baker is a finalist or a runner-up
    top3 = probs[:,0]+probs[:,1]
    # bottom tier (8th and below)
    bot = probs[:,-1]
    # 5th - 7th
    nextbot = probs[:,-2]
    third = probs[:,-3]

    gbbo['bottom']=np.round(bot*100,decimals=2)
    gbbo['finalist']=np.round(top*100,decimals=2) 
    gbbo['top3'] = np.round(top3*100,decimals=2)
    gbbo['fifthseventh'] = np.round(nextbot*100,decimals=2)
    gbbo['thirdforth'] = np.round(third*100,decimals=2)
    S11 = pd.concat([S11,gbbo])
    gbbo.to_csv("deepbake_s11.week{}_preditions.txt".format(e),sep="\t",index=False)
    print(e)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).
Instructions for updating:
Please use `model.predict()` instead.
1
2
3
4
5
6


In [8]:
gbbo.sort_values(['top3'],ascending=False)

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad,preds,bottom,finalist,top3,fifthseventh,thirdforth
5,11,Dave,6,0,0.0,0.636364,0.666667,0.0,1.0,1.0,0.0,0.0,0,3.07,47.25,72.120003,8.28,16.540001
59,11,Peter,6,0,0.333333,0.454545,0.666667,0.0,0.666667,0.0,0.0,0.0,1,8.97,15.83,59.200001,13.45,18.389999
17,11,Laura,6,0,0.895683,1.0,0.666667,0.0,0.444444,0.0,0.0,1.0,1,4.12,12.09,51.860001,25.209999,18.799999
11,11,Hermine,6,0,0.709114,0.727273,0.0,0.0,0.666667,0.0,0.0,1.0,1,7.79,10.66,42.669998,31.290001,18.24
35,11,Lottie,6,0,0.836211,0.545455,0.666667,1.0,0.444444,1.0,0.0,0.0,2,4.65,10.42,30.139999,27.799999,37.41
47,11,Marc,6,0,1.0,0.909091,0.666667,0.0,1.0,1.0,0.777778,0.0,2,3.53,9.22,29.98,16.34,50.150002
53,11,Mark,6,0,0.453245,0.818182,0.666667,0.0,0.666667,0.0,0.777778,1.0,3,7.1,7.71,26.66,45.880001,20.360001
71,11,Sura,6,4,0.333333,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,4,41.150002,8.07,25.719999,19.280001,13.86
29,11,Loriea,6,1,0.107834,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,55.790001,5.16,11.55,22.620001,10.04
41,11,Mak,6,2,0.125786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,56.5,4.94,11.12,22.57,9.81


In [9]:
gbbo.sort_values(['fifthseventh'],ascending=False)

Unnamed: 0,season,baker,episode,place,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad,preds,bottom,finalist,top3,fifthseventh,thirdforth
53,11,Mark,6,0,0.453245,0.818182,0.666667,0.0,0.666667,0.0,0.777778,1.0,3,7.1,7.71,26.66,45.880001,20.360001
11,11,Hermine,6,0,0.709114,0.727273,0.0,0.0,0.666667,0.0,0.0,1.0,1,7.79,10.66,42.669998,31.290001,18.24
35,11,Lottie,6,0,0.836211,0.545455,0.666667,1.0,0.444444,1.0,0.0,0.0,2,4.65,10.42,30.139999,27.799999,37.41
65,11,Rowan,6,3,0.537404,0.0,0.0,0.0,0.0,0.0,0.777778,0.0,4,68.260002,0.24,0.7,27.68,3.35
23,11,Linda,6,0,0.635701,0.0,0.0,0.0,0.0,0.0,0.777778,0.0,4,70.120003,0.18,0.55,26.49,2.83
17,11,Laura,6,0,0.895683,1.0,0.666667,0.0,0.444444,0.0,0.0,1.0,1,4.12,12.09,51.860001,25.209999,18.799999
29,11,Loriea,6,1,0.107834,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,55.790001,5.16,11.55,22.620001,10.04
41,11,Mak,6,2,0.125786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,56.5,4.94,11.12,22.57,9.81
71,11,Sura,6,4,0.333333,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,4,41.150002,8.07,25.719999,19.280001,13.86
47,11,Marc,6,0,1.0,0.909091,0.666667,0.0,1.0,1.0,0.777778,0.0,2,3.53,9.22,29.98,16.34,50.150002
