In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import _pickle as pickle
import gc
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
import optuna
import os
import tensorflow as tf
from sklearn.metrics import roc_auc_score
from hyperopt import fmin, tpe, hp

# import tensorflow as tf
def save(file,name, folder = ""):
    if folder != "":
        outfile = open('./'+folder+'/'+name+'.pickle', 'wb')
    else:
        outfile = open(name+'.pickle', 'wb')
    pickle.dump(file, outfile, protocol=4)
    outfile.close
    
def load(name, folder = ""):
    if folder != "":
        outfile = open('./'+folder+'/'+name+'.pickle', 'rb')
    else:
        outfile = open(name+'.pickle', 'rb')
    file = pickle.load(outfile)
    outfile.close
    return file

pd.set_option('display.max_columns', 150)
pd.set_option('display.max_rows', 150)
import tensorflow as tf
from tensorflow.keras.mixed_precision import experimental as mixed_precision
gpus = tf.config.experimental.list_physical_devices('GPU')

tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1500)]
            )
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")


policy = mixed_precision.Policy('mixed_float16')
# policy = mixed_precision.Policy('float32')
mixed_precision.set_policy(policy)

def utility_metric(date,weights, resp, action):
    import numpy as np
    p = []
    for i in np.unique(date):
        wi = weights[date == i]
        ri = resp[date == i]
        ai = action[date == i]
        pi = np.sum(wi * ri * ai)
        p.append(pi)
    p = np.array(p)
    
    nt = np.unique(date).shape[0]
#     print(nt)
    sp = np.sum(p)
    normp = np.sqrt(np.sum(np.square(p)))
    t = (sp / normp) * np.sqrt(250/nt)
    u = min(max(t,0), 6) * sp
    return u
    

def build_model(parameters):
    inputs = tf.keras.Input(shape = (131,))
    if parameters['norm']:
        x = tf.keras.layers.experimental.preprocessing.Normalization()(inputs)
    else:
        x = inputs
        
    for block in range(parameters['n_blocks']):
        for n in range(parameters['n_dense_per_block']):
            x = tf.keras.layers.Dense(parameters['dense_shape'][block], name = 'block_'+str(block)+'_dense_'+str(n))(x)
        if parameters['normalization'][block]:
            x = tf.keras.layers.BatchNormalization(name =  'block_'+str(block)+'_batch_norm')(x)
        x = tf.keras.activations.relu(x)
        tf.keras.layers.Dropout(parameters['dropouts'][block], name =  'block_'+str(block)+'_dropout')(x)
    x = tf.keras.layers.Dense(1, activation = 'sigmoid', name = 'classification_head')(x)
    model = tf.keras.Model(inputs, x)
    model.compile(  loss = 'binary_crossentropy',
                    optimizer = tf.keras.optimizers.Adam(parameters['lr']),
                    metrics = ['accuracy', 'AUC'])
    return model

def get_fold(fold_number, X_train, y_train, date_train, weights_train):
    filters = (date_train >= 44*fold_number)&(date_train < 44*(fold_number+1))
    filters = np.invert(filters)
    Xt, Xv, Yt, Yv = X_train[filters], X_train[np.invert(filters)], y_train[filters], y_train[np.invert(filters)]
    datet, datev, weightst, weightsv = date_train[filters], date_train[np.invert(filters)], weights_train[filters], weights_train[np.invert(filters)]
    sw = abs((Yt * weightst)) + 1
    yt, yv = (Yt > 0)*1, (Yv > 0)*1
    return Xt, Xv, yt, yv, Yt, Yv, datet, datev, weightst, weightsv, sw

class Utility_Callback(tf.keras.callbacks.Callback):
    def __init__(self, Xv, Yv,yv, datev, weightsv, bs):
        self.Xv = Xv
        self.Yv = Yv
        self.yv = yv
        self.datev = datev
        self.weightsv = weightsv
        self.bs = bs

    def on_epoch_end(self, epoch, logs = None):
        ## Prediction
        pred = self.model.predict(self.Xv, batch_size = self.bs)[:,0]
        pred[pred<0] = 0
        pred[pred>1] = 1
        
        auc = roc_auc_score(self.yv, pred)
#         print("val_auc is "+str(auc))
        
        ## Optimization
        space = hp.normal('x', 0.5, 0.02)
        def f(x):
            action = (pred>x)*1
            utility = utility_metric(self.datev,self.weightsv, self.Yv, action)
            return -utility
        
        best = fmin(
            fn=f,  # "Loss" function to minimize
            space=space,  # Hyperparameter space
            algo=tpe.suggest,  # Tree-structured Parzen Estimator (TPE)
            max_evals=100  # Perform 1000 trials
        )
        action = (pred >= best['x'])*1
        val_utility = utility_metric(self.datev,self.weightsv, self.Yv, action)
        bench_utility = utility_metric(self.datev,self.weightsv, self.Yv, self.yv)
        logs.update({'val_utility' : val_utility})
        print("Val_utility is : "+str(val_utility) + ' best possible would be : '+str(bench_utility))
        
        
def train(model, parameters, Xt, Xv, yt, yv, sw, Yv, datev, weightsv):
    sample_weight = sw if parameters['use_sample_weights'] else None
    epochs = 100
    utility_call = Utility_Callback(Xv, Yv,yv, datev, weightsv, bs = parameters['batch_size'])
    
    early = EarlyStopping(monitor='val_utility', min_delta=0.0001, patience=8, verbose=1, 
                                                mode='max', restore_best_weights=True)

    reduce = ReduceLROnPlateau(monitor='val_utility', factor=0.1, patience=3, verbose=1, 
                               mode='max', min_delta=0.0001, cooldown=0, min_lr=0)
    callbacks =[utility_call, early, reduce]
    
    history = model.fit(Xt, yt, validation_data = (Xv, yv), 
                  batch_size=parameters['batch_size'], epochs=epochs, callbacks = callbacks,
                           sample_weight = None, verbose = 2)
    sc = np.max(history.history['val_auc'])
    return model, sc

def make_experiment(fold_number, n_trials = 100):
    try:
        os.mkdir('./time_cv_ensembling/'+str(fold_number))
    except:
        1
    
    print("Loading Data")
    (X_train, X_test, y_train, y_test, date_train, date_test, weights_train, weights_test) = load('splitted_dataset')
    
    X_train = X_train.values
    X_test = X_test.values
    y_test_cat = (y_test > 0)*1
    
    print("Loading Fold")
    Xt, Xv, yt, yv, Yt, Yv, datet, datev, weightst, weightsv, sw = get_fold(fold_number, X_train, y_train, date_train, weights_train)
    del X_train
    del y_train
    gc.collect()
    
    print(Xt.shape, yt.shape, Yt.shape,datet.shape, weightst.shape, sw.shape)
    print(Xv.shape, yv.shape, Yv.shape,datev.shape, weightsv.shape)
    print(X_test.shape, y_test_cat.shape, y_test.shape,date_test.shape, weights_test.shape)
    
    print("Launching study")
    study = optuna.create_study(direction = 'maximize')
    study.optimize(get_objective((Xt, Xv, yt, yv, Yt, Yv, datet, datev, weightst, weightsv, sw, X_test, y_test, y_test_cat,date_test, weights_test, fold_number)), n_trials= n_trials)
    print("Study ended")

def get_objective(data):
    def objective(trial, data = data):
        assert data is not None , "Please inject some datas in the objective function"
        Xt, Xv, yt, yv, Yt, Yv, datet, datev, weightst, weightsv, sw, X_test, y_test, y_test_cat,date_test, weights_test, fold_number = data
        name = trial.suggest_int('name', 100000, 999999)
        ## Parameters
        n_blocks = 2 #trial.suggest_int('n_block', 2, 2)
        n_dense_per_block = trial.suggest_int('n_dense_per_block', 1, 2)

        dense_shape = []
        dropouts = []
        normalization = []

        for i in range(n_blocks):
            dense_shape.append(trial.suggest_categorical('dense_block_'+str(i), [64,128,256, 512, 1024]))
            dropouts.append(trial.suggest_uniform('dropout_block_'+str(i),0,0.4))
            normalization.append(trial.suggest_categorical('norm_block_'+str(i), [True])) 
        batch_size = trial.suggest_categorical("batch_size", [512, 1024, 2048])
        lr = trial.suggest_categorical("lr", [0.01,0.001, 0.0001])
        norm = trial.suggest_categorical("norm", [True])
        use_sample_weights = trial.suggest_categorical("sample_weights", [True, False])

        parameters = {
            "name" : name,
            "n_blocks" : n_blocks,
            "n_dense_per_block" : n_dense_per_block,
            "dense_shape" : dense_shape,
            "dropouts" : dropouts,
            "normalization" : normalization,
            "batch_size" : batch_size,  
            'lr' : lr,
            "use_sample_weights" : use_sample_weights,
            "norm" : norm, 
        }

        ## Model building and training
        print('Model training, go grab a coffee')
        print(parameters)
        model = build_model(parameters)
        
        model, val_auc = train(model, parameters, Xt, Xv, yt, yv, sw, Yv, datev, weightsv)

        print("Model trained")
        ## Evaluation on val set
        print("Evaluation")
        parameters['val_auc'] = val_auc
        print("Val auc : " + str(val_auc))
        pred = model.predict(Xv, batch_size = parameters['batch_size'])[:,0]
        pred[pred<0] = 0
        pred[pred>1] = 1
        
        space = hp.normal('x', 0.5, 0.02)
        def f(x):
            action = (pred>x)*1
            utility = utility_metric(datev,weightsv, Yv, action)
            return -utility
        
        best = fmin(
            fn=f,  # "Loss" function to minimize
            space=space,  # Hyperparameter space
            algo=tpe.suggest,  # Tree-structured Parzen Estimator (TPE)
            max_evals=100  # Perform 1000 trials
        )

        parameters['val_treshold'] = best['x']
        action = (pred >= best['x'])*1
        val_utility = utility_metric(datev , weightsv , Yv , action)
        parameters['val_utility'] = val_utility
        print("Val_utility : " + str(val_utility))

        ## Evaluation on test set
        pred = model.predict(X_test, batch_size = parameters['batch_size'])[:,0]
        test_auc = roc_auc_score(y_test_cat, pred)
        print("Test Auc : " + str(test_auc))
        parameters['test_auc'] = test_auc
                
        space = hp.normal('x', 0.5, 0.02)
        def f(x):
            action = (pred>x)*1
            utility = utility_metric(date_test,weights_test, y_test, action)
            return -utility
        best = fmin(
                fn=f,  # "Loss" function to minimize
                space=space,  # Hyperparameter space
                algo=tpe.suggest,  # Tree-structured Parzen Estimator (TPE)
                max_evals=100  # Perform 1000 trials
            )
#         action = (pred >= study_test.best_params['x'])*1
        action = (pred >= best['x'])*1
        parameters['test_treshold'] = best['x']
        test_utility = utility_metric(date_test , weights_test , y_test , action)
        parameters['test_utility'] = test_utility
        print('Test utility : '+ str(test_utility))
        ## Parameters and model savings
        print("Saving")
        try:
            os.mkdir('./time_cv_ensembling/'+str(fold_number)+'/trial_'+str(name))
        except:
            1

        save(parameters, './time_cv_ensembling/'+str(fold_number)+'/trial_'+str(name)+'/parameters')
        model.save('./time_cv_ensembling/'+str(fold_number)+'/trial_'+str(name)+'/model')

        print("Next model")
        print('\n')
        return val_utility
    return objective

In [None]:
make_experiment(3, n_trials = 50)

In [None]:
dico = None
for elt in os.listdir("./time_cv_ensembling/0"):
    params = load('./time_cv_ensembling/0/'+elt+'/parameters')
    if dico is None:
        dico = {}
        for elt in params:
            dico[elt] = [params[elt]]
    else:
        for elt in params:
            dico[elt].append(params[elt])
            
df = pd.DataFrame(dico)

In [None]:
df.sort_values(by = 'test_utility', ascending = False).head(10)

In [None]:
n_fold = 250
# def get_fold(fold_number, X_train, y_train, date_train, weights_train):
#     filters = (date_train >= int(440/n_fold)*fold_number)&(date_train < int(440/n_fold)*(fold_number+1))
#     filters = np.invert(filters)
#     Xt, Xv, Yt, Yv = X_train[filters], X_train[np.invert(filters)], y_train[filters], y_train[np.invert(filters)]
#     datet, datev, weightst, weightsv = date_train[filters], date_train[np.invert(filters)], weights_train[filters], weights_train[np.invert(filters)]
#     sw = abs((Yt * weightst)) + 1
#     yt, yv = (Yt > 0)*1, (Yv > 0)*1
#     return Xt, Xv, yt, yv, Yt, Yv, datet, datev, weightst, weightsv, sw

def get_fold(fold_number, X_train, y_train, date_train, weights_train):
    val = np.random.choice(list(range(440)), size = 146)
    def check(x, val = val):
        if x in val:
            return True
        else:
            return False
    filters = list(map(check, date_train))
    filters = np.invert(filters)
    Xt, Xv, Yt, Yv = X_train[filters], X_train[np.invert(filters)], y_train[filters], y_train[np.invert(filters)]
    datet, datev, weightst, weightsv = date_train[filters], date_train[np.invert(filters)], weights_train[filters], weights_train[np.invert(filters)]
    sw = abs((Yt * weightst)) + 1
    sw_eval = abs((Yv * weightsv)) + 1
    yt, yv = (Yt > 0)*1, (Yv > 0)*1
    return Xt, Xv, yt, yv, Yt, Yv, datet, datev, weightst, weightsv, sw, sw_eval

In [None]:
(pred_train, pred_test, ratio) = load("lgbm_prediction_train_test")

start = 250
for fold_number in tqdm(range(start, start + n_fold)):
    print("Loading Data")
    (X_train, X_test, y_train, y_test, date_train, date_test, weights_train, weights_test) = load('splitted_dataset')
    
    X_train = X_train.values
    X_test = X_test.values
    y_test_cat = (y_test > 0)*1
    
    X_train = np.concatenate([X_train, (date_train%7)[:,None], (date_train%5)[:,None], (date_train%365)[:,None]], axis = -1)
    X_test = np.concatenate([X_test, (date_test%7)[:,None], (date_test%5)[:,None], (date_test%365)[:,None]], axis = -1)
    
    print("Loading Fold")
    Xt, Xv, yt, yv, Yt, Yv, datet, datev, weightst, weightsv, sw, sw_eval = get_fold(fold_number, X_train, y_train, date_train, weights_train)
    del X_train
    del y_train
    gc.collect()
    
    import lightgbm as lgb
    clf = lgb.LGBMClassifier(max_depth = -1, n_estimators = 20000, n_jobs = 6, early_stopping_rounds = 20,  first_metric_only = True)
    print(1)
    clf.fit(Xt, yt, eval_set =(Xv, yv), eval_metric = 'auc', sample_weight=sw,verbose = True, eval_sample_weight = [sw_eval])
    p = clf.predict(Xv)
    th_util = utility_metric(datev , weightsv , Yv , yv)
    re_util = utility_metric(datev , weightsv , Yv , p)
    ratio.append(re_util/th_util)
    save(clf, './lgb_fold/lgb_'+str(fold_number))
    print("real utility is "+str(re_util)+" theoric util is "+str(th_util))
    print(re_util/th_util)
    print(np.mean(ratio))
    print(len(ratio))
    print('\n')
    
save(ratio, 'lgbm_success_ratio')

In [None]:
len(ratio)

In [None]:
## Benchmark
import lightgbm as lgb

(X_train, X_test, y_train, y_test, date_train, date_test, weights_train, weights_test) = load('splitted_dataset')
SAMPLE_WEIGHTS = abs((y_train * weights_train)) + 1
SAMPLE_WEIGHTS_EVAL = abs((y_test * weights_test)) + 1
X_train = np.concatenate([X_train, (date_train%7)[:,None], (date_train%5)[:,None], (date_train%365)[:,None]], axis = -1)
X_test = np.concatenate([X_test, (date_test%7)[:,None], (date_test%5)[:,None], (date_test%365)[:,None]], axis = -1)

clf = lgb.LGBMClassifier(max_depth = -1, n_estimators = 20000, n_jobs = 12, silent = False, early_stopping_rounds = 20,  first_metric_only = True)
clf.fit(X_train, (y_train>0)*1, eval_set =(X_test, (y_test>0)*1), eval_metric = 'auc', sample_weight=SAMPLE_WEIGHTS, eval_sample_weight = [SAMPLE_WEIGHTS_EVAL])
p = clf.predict(X_test)
utility_metric(date_test , weights_test , y_test , p)

In [None]:
clf.feature_importances_

In [None]:
2527

In [None]:
(X_train, X_test, y_train, y_test, date_train, date_test, weights_train, weights_test) = load('splitted_dataset')
ratio = load('lgbm_success_ratio')
X_train = np.concatenate([X_train, (date_train%7)[:,None], (date_train%5)[:,None], (date_train%365)[:,None]], axis = -1)
X_test = np.concatenate([X_test, (date_test%7)[:,None], (date_test%5)[:,None], (date_test%365)[:,None]], axis = -1)

pred_train = []
pred_test = []
n_fold = 500
utilities = []
for fold_number in tqdm(range(n_fold)):
    print(fold_number)
    print(ratio[fold_number])
    clf = load('./lgb_fold/lgb_'+str(fold_number))
    
    p = clf.predict(X_test)
    ut = utility_metric(date_test , weights_test , y_test , p)
    utilities.append(ut)
    print('utility is : '+str(ut))
    
    pred_train.append(clf.predict_proba(X_train)[:,1])
    pred_test.append(clf.predict_proba(X_test)[:,1])

plt.scatter(ratio, utilities)

In [None]:
y_train1 = (y_train > 0)*1
y_test1 = (y_test > 0)*1

In [None]:
pred_train = np.array(pred_train).T
pred_test = np.array(pred_test).T

In [None]:
save((pred_train, pred_test, ratio), "lgbm_prediction_train_test")

In [None]:
plt.hist(ratio)

In [None]:
(pred_train, pred_test, ratio) = load("lgbm_prediction_train_test")

n = 500
ratio = np.array(ratio)
s = np.argsort(ratio)[-n:]

pred_train = pred_train[:, s]
pred_test = pred_test[:, s]

In [None]:
pred_train.shape

In [None]:
(X_train, X_test, y_train, y_test, date_train, date_test, weights_train, weights_test) = load('splitted_dataset')

SAMPLE_WEIGHTS = abs((y_train * weights_train)) + 1
y_train1 = (y_train > 0)*1
y_test1 = (y_test > 0)*1

# import lightgbm as lgb
# clf = lgb.LGBMClassifier(max_depth = -1, n_estimators = 1500, n_jobs = 12, silent = False, early_stopping_rounds = 50,  first_metric_only = True)
# clf.fit(pred_train, y_train1, eval_set =(pred_test, y_test1), eval_metric = 'auc', sample_weight=SAMPLE_WEIGHTS)
# clf.fit(pred_train, y_train1, sample_weight=SAMPLE_WEIGHTS)
# import sklearn
# clf = sklearn.linear_model.LogisticRegression(n_jobs = 6)
# clf.fit(pred_train, y_train1)

In [None]:
pred_test.shape

In [None]:
# pred = clf.predict(pred_test)
pred = clf.predict_proba(pred_test)[:,1]

In [None]:
pred = np.mean(pred_test , axis = -1)

In [None]:
# y_train_soft = clf.predict_proba(pred_train)[:,1]
# y_test_soft = clf.predict_proba(pred_test)[:,1]

y_train_soft = np.mean(pred_train , axis = -1)
y_test_soft = np.mean(pred_test , axis = -1)

In [None]:
save((y_train_soft, y_test_soft), 'soft_labels')

In [None]:
dir(lgb)

In [None]:
(y_train_soft, y_test_soft) = load('soft_labels')

In [None]:
fact = 10

In [None]:
plt.hist(y_train_soft*fact, bins = 50)

In [None]:
plt.hist(y_test_soft*fact, bins = 50)

In [None]:
(X_train, X_test, y_train, y_test, date_train, date_test, weights_train, weights_test) = load('splitted_dataset')

In [None]:
X_train.head()

In [None]:
(X_train, X_test, y_train, y_test, date_train, date_test, weights_train, weights_test) = load('splitted_dataset')
ratio = load('lgbm_success_ratio')
X_train = np.concatenate([X_train, (date_train%7)[:,None], (date_train%5)[:,None], (date_train%365)[:,None]], axis = -1)
X_test = np.concatenate([X_test, (date_test%7)[:,None], (date_test%5)[:,None], (date_test%365)[:,None]], axis = -1)
SAMPLE_WEIGHTS = abs((y_train * weights_train)) + 1
(y_train_soft, y_test_soft) = load('soft_labels')

y_train_soft *= 100
y_test_soft *= 100

import lightgbm as lgb
clf = lgb.LGBMRegressor(max_depth = -1, n_estimators = 20000, n_jobs = 12, silent = False, early_stopping_rounds = 50,  first_metric_only = True)
clf.fit(X_train, y_train_soft, eval_set =(X_test, y_test_soft), sample_weight=SAMPLE_WEIGHTS)

In [None]:
for i in tqdm(range(500)):
    a = clf.predict(X_test[i:i+1])

In [None]:
pred = clf.predict(X_test)

In [None]:
plt.hist(pred, bins = 50)

In [None]:
plt.scatter(pred, y_test)

In [None]:
import optuna
def objective(trial):
#     x = trial.suggest_uniform('x', 0.45, 0.55)
    x = trial.suggest_uniform('x', 45, 55)
    action = (pred>x)*1
    utility = utility_metric(date_test,weights_test, y_test, action)
    print(utility)
    return -utility

study = optuna.create_study()
study.optimize(objective, n_trials=100)

In [None]:
study.best_params['x']

In [None]:
action = (pred >= study.best_params['x'])*1
# action = (pred >= 50)*1
# action = pred
utility_metric(date_test,weights_test, y_test, action)

In [None]:
save(clf, 'distil_lgb_mean')

In [None]:
2848

In [None]:
def gen_artificial_dataset(X, n_sample, batch_size, shuffle_ratio):
    n_batch = int(n_sample/batch_size)
    X_aug = [0 for i in range(n_batch+1)]
    X_aug[0] = deepcopy(X)
    for i in tqdm(range(n_batch)):
        indices = np.random.randint(0, X.shape[0], size = batch_size)
        X_temp = X[ind]
        
        for col in range(X_temp.shape[0]):
            r = np.random.uniform(0,1)
            if r < shuffle_ratio:
                X_temp[:,col] = np.random.shuffle(X_temp[:,col])
        X_aug[i+1] = deepcopy(X_temp)
    X_aug = np.concatenate(X_aug, axis = 0)
    
    ## Shuffle
    shuffle_id = np.random.shuffle(list(range(X_aug.shape[0])))
    
    X_aug = X_aug[shuffle_id]
    return X_aug
    
    