#Importo librerie

In [1]:
!pip install keras-tuner



In [2]:
import pandas as pd 
import numpy as np 
import itertools

from time import time

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, r2_score, precision_recall_fscore_support
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.utils import shuffle

from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, SVMSMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

#import smogn

from keras.models import Sequential
from keras.layers import Dense, LeakyReLU, Dropout, Conv1D, Flatten, MaxPooling1D, AveragePooling1D, SimpleRNN, LSTM
from keras.regularizers import l2, l1, l1_l2
from keras.constraints import max_norm
from keras.callbacks import EarlyStopping, History
from keras import optimizers

from kerastuner import HyperModel
from kerastuner.tuners import Hyperband



#Definizione funzioni

In [3]:
# grafico matrice di confusione

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [4]:
# stampa tempo trascorso

def print_exec_time(start):
  print("Addestramento completato in %f secondi" % (time()-start))

In [5]:
# funzione per creare un sotto-df con le classi target bilanciate, in modo che anche le classi della variabile specificata nel parametro col siano egualmente rappresentate in ciascuna classe target

def undersample(df, label, col=None):
  hits = df[df[label] == 1]
  non_hits = df[df[label] == 0]

  if(col != None):

    non_hits_sampled_array = []

    for a in non_hits[col].unique():
      n = hits[hits[col] == a].id.count()
      non_hits_sampled_array.append(non_hits[non_hits[col] == a].sample(n))

    non_hits_sampled = pd.concat(non_hits_sampled_array)

  else:
    non_hits_sampled = df[df[label] == 0].sample(hits.shape[0])

  return pd.concat([hits,non_hits_sampled])

In [6]:
def over_under_balancing(X, Y, oversample_ratio, oversample_algorithm):
  '''
  oversample_algorithm --> [SMOTE, BorderlineSMOTE, SVMSMOTE, ADASYN]
  '''
  over = oversample_algorithm(sampling_strategy=oversample_ratio) # --> genero nuovi esempi nella classe in minoranza (hit) in modo da portarla al (es.) 50% della classe maggioritaria (non-hit)
  under = RandomUnderSampler(sampling_strategy=1) # --> rimuovo casualmente esempi dalla classe maggioritaria (non-hit) fino a portarla al 100% della classe minoritaria (hit)
  steps = [('o', over),('u', under)]
  pipeline = Pipeline(steps=steps)

  X, Y = pipeline.fit_resample(X, Y)
  
  return X, Y

In [7]:
# new_features_params
def insert_new_features_params(row, new_features_params):
    if(row.features == 'standard features'):
        return None
    else:
        return new_features_params

In [8]:
# selezionare il numero di componenti principali per LDA

def select_n_components(var_ratio, goal_var: float) -> int:
    # Set initial variance explained so far
    total_variance = 0.0
    
    # Set initial number of features
    n_components = 0
    
    # For the explained variance of each feature:
    for explained_variance in var_ratio:
        
        # Add the explained variance to the total
        total_variance += explained_variance
        
        # Add one to the number of components
        n_components += 1
        
        # If we reach our goal level of explained variance
        if total_variance >= goal_var:
            # End the loop
            break
            
    # Return the number of components
    return n_components

#Importo Dataset

In [9]:
!pip install -U -q PyDrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [10]:
drive.CreateFile({'id':'1-0o81KniM9hNtC8zqBaYyQWAmGdTYSS5'}).GetContentFile('dataset_final_4.0.csv')
df = pd.read_csv("dataset_final_4.0.csv").drop('Unnamed: 0',axis=1)

In [11]:
#

#DL

In [13]:
category = 'deep learning'
problem = 'classification'
oversample_algorithm = SMOTE
class_balancing = oversample_algorithm.__name__ + ' + random undersampling'

earlyStopping = EarlyStopping(monitor='loss', min_delta=0.001, patience=10, restore_best_weights=True)

result_df_array = []

x = 'all' # --> parametro da variare per considerare le new_features calcolate sui diversi intorni di anni

# # # FEATURES SELECT # # #

std_features_list = ['valence','acousticness','danceability','duration_ms','energy','instrumentalness','liveness','loudness','speechiness','tempo','explicit','key_0','key_1','key_2','key_3','key_4','key_5','key_6','key_7','key_8','key_9','key_10','key_11','mode']

if(x == 'all'):
  new_features = ['valence_new','acousticness_new','danceability_new','duration_ms_new','energy_new','instrumentalness_new','liveness_new','loudness_new','speechiness_new','tempo_new','explicit_new','key_new','mode_new']
  new_features_list = []

  for i in range(4):
    for feat in new_features:
      new_features_list.append(feat + '_' + str(i))
else:
  new_features_list = ['valence_new_'+str(x),'acousticness_new_'+str(x),'danceability_new_'+str(x),'duration_ms_new_'+str(x),'energy_new_'+str(x),'instrumentalness_new_'+str(x),'liveness_new_'+str(x),'loudness_new_'+str(x),'speechiness_new_'+str(x),'tempo_new_'+str(x),'explicit_new_'+str(x),'key_new_'+str(x),'mode_new_'+str(x)]

extra_features = ['month','year','cos(month)','sin(month)','season_1','season_2','season_3','season_4','past_pop_n_hit','past_pop_n_weeks']
targets_list = ['hit', 'weeks_enc']

features_select = {'standard features': std_features_list+extra_features, 'standard + new features': std_features_list+new_features_list+extra_features, 'new features': new_features_list+extra_features}

for current_features in ['standard + new features', 'new features']: # features_select.keys()

    if(current_features != 'standard features'):
      new_features_params = x
    else:
      new_features_params = None

    # # # YEAR RANGE SELECT # # #

    year_range_select = [(1960,2020), (1960,1969), (1970,1979), (1980,1989), (1990,1999), (2000,2009), (2010,2020)]

    for year_range in year_range_select:

      year_start = year_range[0]
      year_end = year_range[1]

      # seleziono sotto_df
      mask_1 = df.year_YYYY >= year_start
      mask_2 = df.year_YYYY <= year_end
      sub_df = df[mask_1]
      sub_df = sub_df[mask_2]

      # regolo dimensione test set in base a numero di anni considerato
      if((year_end - year_start) > 10):
          test_size = 0.2
          val_size = int(0.05 * sub_df.shape[0])
      else:
          test_size = 0.3
          val_size = int(0.08 * sub_df.shape[0])
      
      # creo validation set
      sub_df = shuffle(sub_df)
      val_set = sub_df.iloc[:val_size].copy()
      sub_df = sub_df.iloc[val_size:].copy()

      # seleziono features correnti
      features = features_select[current_features]

      # creo array numpy
      X_val = val_set[features].drop(['month'], axis=1).values
      Y_val = val_set['hit'].values
      
      X = sub_df[features].drop(['month'], axis=1).values
      Y = sub_df['hit'].values
      
      # creo set addestramento e test
      X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size)
      
      # bilancio train set
      X_train, Y_train = over_under_balancing(X_train, Y_train, oversample_ratio=0.6, oversample_algorithm=oversample_algorithm)

      # bilancio test set
      X_test, Y_test = RandomUnderSampler(sampling_strategy=1).fit_resample(X_test, Y_test)

      # bilancio val set
      X_val, Y_val = RandomUnderSampler(sampling_strategy=1).fit_resample(X_val, Y_val)


      # ------------------------------------------- #

      # MODELLI

      # --- model_0 --- #

      model_0 = Sequential()
      # hidden layer
      model_0.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
      model_0.add(Dropout(0.75))
      # output layer
      model_0.add(Dense(1,activation='sigmoid'))

      # --- model_1 --- #

      model_1 = Sequential()
      # hidden layer
      model_1.add(Dense(1024, input_dim=X_train.shape[1], activation='relu'))
      model_1.add(Dropout(0.75))
      # hidden layer
      model_1.add(Dense(254, activation='relu'))
      model_1.add(Dropout(0.65))
      # hidden layer
      model_1.add(Dense(128, activation='relu'))
      model_1.add(Dropout(0.65))
      # hidden layer
      model_1.add(Dense(64, activation='relu'))
      model_1.add(Dropout(0.6))
      # output layer
      model_1.add(Dense(1,activation='sigmoid'))

      models = [model_0, model_1]

      for i, model in enumerate(models):

        adam = optimizers.Adam()  # learning_rate=learning_rates[i]

        model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
        print('\n')
        model.summary()

        time_0 = time()
        model.fit(X_train,Y_train, epochs=100, validation_data=(X_val, Y_val), batch_size=254, callbacks=[earlyStopping])
        print_exec_time(time_0)

        # METRICHE

        # --- metriche test set --- #

        # loss e accuracy
        loss, accuracy = model.evaluate(X_test,Y_test)

        # eseguo predizioni su test set
        Y_pred = (model.predict(X_test) > 0.5).astype("int32")

        # matrice di confusione
        cm = confusion_matrix(Y_test,Y_pred)

        # precision, recall, fscore
        precision, recall, fscore, support = precision_recall_fscore_support(Y_test, Y_pred)

        # --- metriche test set --- #

        # loss e accuracy
        loss_train, accuracy_train = model.evaluate(X_train, Y_train)

        # eseguo predizioni su test set
        Y_pred_train = (model.predict(X_train) > 0.5).astype("int32")

        # matrice di confusione
        cm_train = confusion_matrix(Y_train,Y_pred_train)

        # precision, recall, fscore
        precision_train, recall_train, fscore_train, support = precision_recall_fscore_support(Y_train, Y_pred_train)

        # imposto a None le metriche della regressione
        MSE = None
        r2 = None
        MSE_train = None
        r2_train = None

        if(i == 1):
          parameters = '1 hidden layer (64)'
        else:
          parameters = '4 hidden layers (1024, 254, 128, 64)'

        tot_time = time() - time_0

        result_df = pd.DataFrame(
                [['MLP',
                parameters,
                class_balancing,
                problem,
                accuracy,
                loss,
                cm,
                precision[0],
                precision[1],
                recall[0],
                recall[1],
                fscore[0],
                fscore[1],
                accuracy_train,
                loss_train,
                cm_train,
                precision_train[0],
                precision_train[1],
                recall_train[0],
                recall_train[1],
                fscore_train[0],
                fscore_train[1],
                MSE,
                r2,
                MSE_train,
                r2_train,
                tot_time]],
          columns=['algorithm',
                  'parameters',
                  'class_balancing',
                  'problem',
                  'test_accuracy',
                  'test_log_loss',
                  'test_confusion_matrix',
                  'test_precision_0',
                  'test_precision_1', 
                  'test_recall_0',
                  'test_recall_1',
                  'test_fscore_0',
                  'test_fscore_1',
                  'train_accuracy',
                  'train_log_loss',
                  'train_confusion_matrix',
                  'train_precision_0',
                  'train_precision_1',
                  'train_recall_0',
                  'train_recall_1',
                  'train_fscore_0',
                  'train_fscore_1',
                  'test_MSE',
                  'test_r2',
                  'train_MSE',
                  'train_r2',
                  'tot_time'])
        

        # inserisco campi mancanti

        # year_range
        year_range_array = [year_range for i in range(result_df.shape[0])]
        result_df.insert(0, 'year_range', year_range_array)

        # features
        features_array = [current_features for i in range(result_df.shape[0])]
        result_df.insert(0, 'features', features_array)

        # new_features_params
        new_features_params_array = [new_features_params for i in range(result_df.shape[0])]
        result_df.insert(0, 'new_features_params', new_features_params_array)

        # category
        category_array = ['deep learning' for i in range(result_df.shape[0])]
        result_df.insert(0, 'category', category_array)
        
        result_df_array.append(result_df)

df_tot = pd.concat(result_df_array)





Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                5504      
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total params: 5,569
Trainable params: 5,569
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
E





Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 64)                5504      
_________________________________________________________________
dropout_5 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 65        
Total params: 5,569
Trainable params: 5,569
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100





Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_14 (Dense)             (None, 64)                5504      
_________________________________________________________________
dropout_10 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 65        
Total params: 5,569
Trainable params: 5,569
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100





Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             (None, 64)                5504      
_________________________________________________________________
dropout_15 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 65        
Total params: 5,569
Trainable params: 5,569
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100





Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_28 (Dense)             (None, 64)                5504      
_________________________________________________________________
dropout_20 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_29 (Dense)             (None, 1)                 65        
Total params: 5,569
Trainable params: 5,569
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100





Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_35 (Dense)             (None, 64)                5504      
_________________________________________________________________
dropout_25 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_36 (Dense)             (None, 1)                 65        
Total params: 5,569
Trainable params: 5,569
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/10





Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_42 (Dense)             (None, 64)                5504      
_________________________________________________________________
dropout_30 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_43 (Dense)             (None, 1)                 65        
Total params: 5,569
Trainable params: 5,569
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/10





Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_49 (Dense)             (None, 64)                3968      
_________________________________________________________________
dropout_35 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_50 (Dense)             (None, 1)                 65        
Total params: 4,033
Trainable params: 4,033
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/10





Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_56 (Dense)             (None, 64)                3968      
_________________________________________________________________
dropout_40 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_57 (Dense)             (None, 1)                 65        
Total params: 4,033
Trainable params: 4,033
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/10





Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_63 (Dense)             (None, 64)                3968      
_________________________________________________________________
dropout_45 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_64 (Dense)             (None, 1)                 65        
Total params: 4,033
Trainable params: 4,033
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/10





Model: "sequential_20"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_70 (Dense)             (None, 64)                3968      
_________________________________________________________________
dropout_50 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_71 (Dense)             (None, 1)                 65        
Total params: 4,033
Trainable params: 4,033
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/10





Model: "sequential_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_77 (Dense)             (None, 64)                3968      
_________________________________________________________________
dropout_55 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_78 (Dense)             (None, 1)                 65        
Total params: 4,033
Trainable params: 4,033
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/10





Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_84 (Dense)             (None, 64)                3968      
_________________________________________________________________
dropout_60 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_85 (Dense)             (None, 1)                 65        
Total params: 4,033
Trainable params: 4,033
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/10





Model: "sequential_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_91 (Dense)             (None, 64)                3968      
_________________________________________________________________
dropout_65 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_92 (Dense)             (None, 1)                 65        
Total params: 4,033
Trainable params: 4,033
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/10

In [14]:
df_tot.sort_values('test_accuracy', ascending=False).head(50)

Unnamed: 0,category,new_features_params,features,year_range,algorithm,parameters,class_balancing,problem,test_accuracy,test_log_loss,test_confusion_matrix,test_precision_0,test_precision_1,test_recall_0,test_recall_1,test_fscore_0,test_fscore_1,train_accuracy,train_log_loss,train_confusion_matrix,train_precision_0,train_precision_1,train_recall_0,train_recall_1,train_fscore_0,train_fscore_1,test_MSE,test_r2,train_MSE,train_r2,tot_time
0,deep learning,all,standard + new features,"(1980, 1989)",MLP,"4 hidden layers (1024, 254, 128, 64)",SMOTE + random undersampling,classification,0.728659,0.536881,"[[617, 203], [242, 578]]",0.718277,0.740077,0.752439,0.704878,0.734961,0.722049,0.775697,0.487963,"[[4629, 1753], [1110, 5272]]",0.806587,0.750463,0.725321,0.826073,0.763798,0.786455,,,,,17.77834
0,deep learning,all,standard + new features,"(2010, 2020)",MLP,"4 hidden layers (1024, 254, 128, 64)",SMOTE + random undersampling,classification,0.726757,0.55239,"[[1315, 535], [476, 1374]]",0.734227,0.719749,0.710811,0.742703,0.722329,0.731045,0.752564,0.524641,"[[4741, 1986], [1343, 5384]]",0.779257,0.730529,0.704772,0.800357,0.740145,0.76385,,,,,19.439748
0,deep learning,all,standard + new features,"(2010, 2020)",MLP,1 hidden layer (64),SMOTE + random undersampling,classification,0.722973,0.564761,"[[1444, 406], [619, 1231]]",0.699952,0.751985,0.780541,0.665405,0.738053,0.706051,0.758213,0.511923,"[[5239, 1488], [1765, 4962]]",0.748001,0.769302,0.778802,0.737624,0.763091,0.75313,,,,,106.680723
0,deep learning,all,new features,"(2000, 2009)",MLP,"4 hidden layers (1024, 254, 128, 64)",SMOTE + random undersampling,classification,0.718093,0.568484,"[[1111, 315], [489, 937]]",0.694375,0.748403,0.779102,0.657083,0.734303,0.699776,0.733104,0.562003,"[[4642, 1306], [1869, 4079]]",0.712947,0.757474,0.78043,0.685777,0.745164,0.719845,,,,,17.634412
0,deep learning,all,new features,"(2010, 2020)",MLP,"4 hidden layers (1024, 254, 128, 64)",SMOTE + random undersampling,classification,0.716209,0.570536,"[[1254, 529], [483, 1300]]",0.721934,0.710771,0.703309,0.729108,0.7125,0.719823,0.72423,0.566458,"[[4656, 2067], [1641, 5082]]",0.7394,0.710869,0.692548,0.755913,0.715207,0.732699,,,,,18.54327
0,deep learning,all,new features,"(2010, 2020)",MLP,1 hidden layer (64),SMOTE + random undersampling,classification,0.715087,0.581805,"[[1343, 440], [576, 1207]]",0.699844,0.732848,0.753225,0.676949,0.725554,0.70379,0.719768,0.580211,"[[4982, 1741], [2027, 4696]]",0.7108,0.729532,0.741038,0.698498,0.725604,0.713678,,,,,66.506095
0,deep learning,all,new features,"(2000, 2009)",MLP,1 hidden layer (64),SMOTE + random undersampling,classification,0.712833,0.570107,"[[1192, 234], [585, 841]]",0.670793,0.782326,0.835905,0.589762,0.744302,0.672531,0.723436,0.561316,"[[4963, 985], [2305, 3643]]",0.682856,0.787165,0.834398,0.612475,0.751059,0.688918,,,,,88.314725
0,deep learning,all,standard + new features,"(1980, 1989)",MLP,1 hidden layer (64),SMOTE + random undersampling,classification,0.710976,0.569046,"[[622, 198], [276, 544]]",0.69265,0.733154,0.758537,0.663415,0.724098,0.696543,0.800768,0.442684,"[[4801, 1581], [962, 5420]]",0.833073,0.774175,0.752272,0.849264,0.790613,0.809983,,,,,118.225825
0,deep learning,all,standard + new features,"(2000, 2009)",MLP,1 hidden layer (64),SMOTE + random undersampling,classification,0.709162,0.579077,"[[1081, 327], [492, 916]]",0.687222,0.736927,0.767756,0.650568,0.72526,0.69106,0.776715,0.501938,"[[4642, 1319], [1343, 4618]]",0.775606,0.777834,0.778728,0.774702,0.777164,0.776265,,,,,116.418715
0,deep learning,all,new features,"(1980, 1989)",MLP,"4 hidden layers (1024, 254, 128, 64)",SMOTE + random undersampling,classification,0.706576,0.554586,"[[561, 245], [228, 578]]",0.711027,0.702309,0.69603,0.717122,0.703448,0.709638,0.735858,0.53674,"[[4403, 1961], [1401, 4963]]",0.758615,0.716782,0.69186,0.779855,0.723702,0.74699,,,,,16.455284


# Export

In [15]:
from google.colab import  drive

# mounts the google drive to Colab Notebook
drive.mount('/content/drive',force_remount=True)

df_tot.to_csv('/content/drive/My Drive/Colab Notebooks/datasets/results_DL_4.1_'+str(x)+'.csv')

Mounted at /content/drive


#bak

In [None]:
'''
# MODELLI

# --- model_0 --- #

model_0 = Sequential()

# hidden layer 1
model_0.add(Dense(88, input_dim=X_train.shape[1], activation='relu'))
# hidden layer 2
model_0.add(Dense(16, activation='relu'))
model_0.add(Dropout(0.1))
# hidden layer 3
model_0.add(Dense(104, activation='relu'))
# hidden layer 4
model_0.add(Dense(104, activation='relu'))
model_0.add(Dropout(0.1))
# hidden layer 5
model_0.add(Dense(16, activation='relu'))
model_0.add(Dropout(0.8))
# output layer
model_0.add(Dense(1,activation='sigmoid'))


# --- model_1 --- #

model_1 = Sequential()

# hidden layer 1
model_1.add(Dense(104, input_dim=X_train.shape[1], activation='relu'))
# hidden layer 2
model_1.add(Dense(88, activation='relu'))
model_1.add(Dropout(0.4))
# hidden layer 3
model_1.add(Dense(16, activation='relu'))
model_1.add(Dropout(0.7))
# hidden layer 4
model_1.add(Dense(128, activation='relu'))
model_1.add(Dropout(0.2))
# hidden layer 5
model_1.add(Dense(64, activation='relu'))
model_1.add(Dropout(0.1))
# output layer
model_1.add(Dense(1,activation='sigmoid'))


# --- model_2 --- #

model_2 = Sequential()

# hidden layer 1
model_2.add(Dense(88, input_dim=X_train.shape[1], activation='relu'))
model_2.add(Dropout(0.3))
# hidden layer 2
model_2.add(Dense(112, activation='relu'))
model_2.add(Dropout(0.6))
# hidden layer 3
model_2.add(Dense(16, activation='relu'))
model_2.add(Dropout(0.6))
# hidden layer 4
model_2.add(Dense(88, activation='relu'))
model_2.add(Dropout(0.2))
# hidden layer 5
model_2.add(Dense(120, activation='relu'))
model_2.add(Dropout(0.6))
# output layer
model_2.add(Dense(1,activation='sigmoid'))


# --- model_3 --- #

model_3 = Sequential()

# hidden layer 1
model_3.add(Dense(72, input_dim=X_train.shape[1], activation='relu'))
model_3.add(Dropout(0.8))
# hidden layer 2
model_3.add(Dense(24, activation='relu'))
# hidden layer 3
model_3.add(Dense(128, activation='relu'))
model_3.add(Dropout(0.4))
# hidden layer 4
model_3.add(Dense(72, activation='relu'))
model_3.add(Dropout(0.7))
# hidden layer 4
model_3.add(Dense(88, activation='relu'))
model_3.add(Dropout(0.3))
# output layer
model_3.add(Dense(1,activation='sigmoid'))


# --- model_4 --- #

model_4 = Sequential()

# hidden layer 1
model_4.add(Dense(472, input_dim=X_train.shape[1], activation='relu'))
model_4.add(Dropout(0.5))
# hidden layer 2
model_4.add(Dense(328, activation='relu'))
model_4.add(Dropout(0.5))
# output layer
model_4.add(Dense(1,activation='sigmoid'))


# --- model_5 --- #

model_5 = Sequential()

# hidden layer 1
model_5.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
# hidden layer 2
model_5.add(Dense(104, activation='relu'))
# hidden layer 3
model_5.add(Dense(72, activation='relu'))
model_5.add(Dropout(0.1))
# output layer
model_5.add(Dense(1,activation='sigmoid'))


# --- model_6 --- #

model_6 = Sequential()

# hidden layer 1
model_6.add(Dense(392, input_dim=X_train.shape[1], activation='relu'))
model_6.add(Dropout(0.4))
# hidden layer 2
model_6.add(Dense(424, activation='relu'))
model_6.add(Dropout(0.3))
# hidden layer 3
model_6.add(Dense(72, activation='relu'))
model_6.add(Dropout(0.1))
# output layer
model_6.add(Dense(1,activation='sigmoid'))



models = [model_0, model_1, model_2, model_3, model_4, model_5, model_6] 
learning_rates = [0.00157, 0.00011, 0.00032, 0.00066, 0.00042, 0.00199, 0.00135]
'''