In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import interp
import scipy.stats
import warnings

from datetime import datetime
from sklearn.preprocessing import QuantileTransformer
import subprocess
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.activations import relu
import tensorflow as tf
warnings.simplefilter("ignore")

In [2]:
def timestamp(): return datetime.today().strftime('%Y%m%d')

In [3]:
def quantile_scale(df,feats):
    qua = df
    scaler = QuantileTransformer(
        n_quantiles=10,
        random_state=42,
        ignore_implicit_zeros=True, #sparse matrix
    )
    # fit the scaler
    scaler.fit(qua[feats])
    # transform values
    qua[feats] = scaler.transform(qua[feats])
    return qua

In [4]:
def tiered(classes):
    trans = []
    for x in classes:
        if x==1: c=0
        if x==2: c=1
        if x>=3 and x<=4: c=2
        if x>=5 and x<=7: c=3
        if x>=8: c=4
        trans.append(c)
    return trans

In [5]:
def merge_feats(episode):
    subprocess.run(f'bash src/get_data_ready.sh {episode}', shell=True)
    merge_col = ['season','baker','episode']
    tech = pd.read_csv(f"deepbake_s13_technical_features.tsv",sep="\t")
    star = pd.read_csv(f"deepbake_judge_features_s13_e{episode}.tsv",sep="\t")
    gbbo = pd.merge(tech, star,  how='left', left_on=merge_col, right_on =merge_col).drop_duplicates()
    gbbo = gbbo[['season','baker','episode','tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']]
    gbbo.to_csv(f"deepbake_features_s13_e{episode}.tsv",sep="\t",index=False)

In [6]:
def process_episode(episode):
    merge_feats(episode)
    gbbo = pd.read_csv(f"deepbake_features_s13_e{episode}.tsv",sep="\t")
    feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
    gbbo = gbbo.loc[gbbo['episode']==episode]
    gbbo = quantile_scale(gbbo,feats)
    return gbbo

In [7]:
def get_training_set(episode):
    tech = pd.read_csv("../../RESULTS/deepbake_features.20221002.tsv",sep='\t')
    tech = tech.loc[tech['episode']==episode]
    feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
    qua = quantile_scale(tech,feats)
    qua['place']=tiered(qua['place'])
    X, y = np.matrix(qua[feats]), np.array(qua['place'])
    return X,y

In [8]:
def create_model( nl1=1, nl2=1,  nl3=1, 
                 nn1=1000, nn2=500, nn3 = 200, lr=0.01, decay=0., l1=0.01, l2=0.01,
                act = 'relu', dropout=0,input_shape=None,output_shape=None):    
    '''This is a model generating function so that we can search over neural net 
    parameters and architecture
    https://www.kaggle.com/arrogantlymodest/randomised-cv-search-over-keras-neural-network
    '''
    opt = tf.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999,  decay=decay)
    reg = keras.regularizers.l1_l2(l1=l1, l2=l2)
    model = Sequential()
    first=True  
    for i in range(nl1):
        if first:
            model.add(Dense(nn1, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn1, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl2):
        if first:
            model.add(Dense(nn2, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn2, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))    
    for i in range(nl3):
        if first:
            model.add(Dense(nn3, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(Dense(nn3, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(Dropout(dropout))       
    model.add(Dense(output_shape, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'],)
    return model

In [9]:
def deep_bake(episode):
    feats = ['tech_mean','tech','mean_star','star','mean_good','good','mean_bad','bad']
    tech = pd.read_csv("../../RESULTS/deepbake_features.20221002.tsv",sep='\t')
    tech['place']=tiered(tech['place'])
    input_shape = len(feats)
    output_shape = len(set(tech['place']))

    l1 = 0.0001
    l2 = 0.0001
    lr = 0.0001
    nl1 = 1
    nl2 = 1
    nl3 = 1
    nn1 = 800
    nn2 = 800
    nn3 = 300
    dropout = 0.1
    decay = 1e-09
    act='relu'
    n_dims = len(feats)
    n_classes = len(set(tech['place']))
    BATCH,EPOCHS = 12, 25
    
    GBBO = pd.read_csv(f"deepbake_features_s13_e{episode}.tsv",sep="\t")
    S11 = pd.DataFrame()
    
    gbbo = GBBO.loc[GBBO['episode']==episode].copy()
    gbbo = quantile_scale(gbbo,feats)
    test = np.matrix(gbbo[feats])

    tech2 = tech.loc[tech['episode']==episode].copy()
    qua = quantile_scale(tech2,feats)
    qua['place']=tiered(qua['place'])

    X, y = np.matrix(qua[feats]), np.array(qua['place'])

    nn = create_model( nl1=nl1, nl2=nl2,  nl3=nl3, 
                     nn1=nn1, nn2=nn2, nn3 = nn3, 
                     lr=lr, decay=decay, l1=l1, l2=l2,
                     act = act, dropout=dropout,
                     input_shape=n_dims,
                     output_shape=n_classes)

    nn.fit(X,y,validation_split=0., batch_size=BATCH, epochs=EPOCHS,verbose=0)
    preds = np.argmax(nn.predict(test) > 0.5, axis=-1).astype("int32")
    probs = nn.predict(test)
    gbbo['preds']=preds
    # probability baker is a finalist
    top = probs[:,0]
    # probability baker is a finalist or a runner-up
    top3 = probs[:,0]+probs[:,1]
    # bottom tier (8th and below)
    bot = probs[:,-1]
    # 5th - 7th
    nextbot = probs[:,-2]
    third = probs[:,-3]

    gbbo['bottom']=np.round(bot*100,decimals=2)
    gbbo['finalist']=np.round(top*100,decimals=2) 
    gbbo['top3'] = np.round(top3*100,decimals=2)
    gbbo['fifthseventh'] = np.round(nextbot*100,decimals=2)
    gbbo['thirdforth'] = np.round(third*100,decimals=2)
    gbbo.to_csv(f"deepbake_s13.week{episode}_predictions.tsv",sep="\t",index=False)

    return gbbo

# deepbake

In [19]:
episode=9
process_episode(episode)

[13]
   output ---> deepbake_s13_technical_features.tsv
   output ---> deepbake_judge_features_s13_e9.tsv


Unnamed: 0,season,baker,episode,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad
8,13,Abdul,9,1.0,0.818182,0.666667,1.0,0.666667,1.0,0.0,0.0
17,13,Carole,9,0.802469,0.0,0.0,0.0,0.0,0.0,1.0,0.0
26,13,Dawn,9,0.727273,0.0,0.0,0.0,0.5,0.0,1.0,0.0
35,13,James,9,0.308133,0.0,0.0,0.0,0.5,0.0,0.222222,0.0
44,13,Janusz,9,0.542342,0.909091,0.888889,0.0,0.834625,0.0,0.555556,1.0
53,13,Kevin,9,0.477477,0.0,0.0,0.0,0.0,0.0,1.0,0.0
62,13,Maisam,9,0.066398,0.0,0.0,0.0,0.0,0.0,0.555556,0.0
71,13,Maxy,9,0.649123,0.0,0.888889,0.0,0.666667,0.0,0.222222,0.0
80,13,Rebs,9,0.199495,0.0,0.0,0.0,0.0,0.0,1.0,0.0
89,13,Sandro,9,0.341085,1.0,0.666667,0.0,0.900901,0.0,0.555556,1.0


In [20]:
df = deep_bake(episode)

2022-11-14 07:30:55.304375: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-14 07:30:55.419825: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [21]:
df.sort_values(['finalist'],ascending=False)


Unnamed: 0,season,baker,episode,tech_mean,tech,mean_star,star,mean_good,good,mean_bad,bad,preds,bottom,finalist,top3,fifthseventh,thirdforth
98,13,Syabira,9,0.902857,0.727273,1.0,0.0,1.0,1.0,0.222222,0.0,0,0.16,75.730003,83.220001,0.2,16.42
8,13,Abdul,9,1.0,0.818182,0.666667,1.0,0.666667,1.0,0.0,0.0,0,0.13,67.75,70.760002,0.19,28.91
44,13,Janusz,9,0.542342,0.909091,0.888889,0.0,0.834625,0.0,0.555556,1.0,0,0.29,44.389999,91.809998,0.28,7.62
89,13,Sandro,9,0.341085,1.0,0.666667,0.0,0.900901,0.0,0.555556,1.0,1,0.33,38.419998,90.790001,0.32,8.56
71,13,Maxy,9,0.649123,0.0,0.888889,0.0,0.666667,0.0,0.222222,0.0,2,0.41,7.86,38.900002,0.48,60.220001
35,13,James,9,0.308133,0.0,0.0,0.0,0.5,0.0,0.222222,0.0,2,0.97,3.39,27.049999,1.1,70.889999
107,13,Will,9,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,2,1.11,1.78,9.91,1.34,87.639999
62,13,Maisam,9,0.066398,0.0,0.0,0.0,0.0,0.0,0.555556,0.0,2,0.14,0.31,4.05,0.18,95.629997
26,13,Dawn,9,0.727273,0.0,0.0,0.0,0.5,0.0,1.0,0.0,2,0.01,0.12,9.12,0.02,90.849998
53,13,Kevin,9,0.477477,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2,0.01,0.03,1.38,0.01,98.610001
