In [None]:
import os,re, pickle, random
import numpy as np
import pandas as pd
from itertools import chain
from bs4 import BeautifulSoup
from xgboost import XGBRegressor
from sklearn.svm import SVR
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import keras
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.losses import MeanAbsoluteError

import matplotlib
from matplotlib import pyplot as plt
from sklearn.metrics import mean_absolute_error, roc_curve, auc
matplotlib.rc('xtick', labelsize=16) 
matplotlib.rc('ytick', labelsize=16) 

In [None]:
# Go to that directory where log files are saved
cd 32x32/ 

In [None]:
def getInfo(file, ip_spks):

    """
    Args:
    file : Log files 
    ip_spks : Number of input neurons or pre-synaptic neurons

    return :
    df : Dataframe
    
    """

    with open(file) as f:
        soup = BeautifulSoup(f.read(), "html.parser")

    df = pd.DataFrame()

    # Making column names
    cols = ["Time(s)"]
    for i in range(ip_spks):
      cols.append("spk_ip_" + str(i))
      cols.append("#Spikes_" + str(i))
      cols.append("ISI_" + str(i))

    for i in range(ip_spks):
      cols += ["spk_out_" + str(i), "#Spikes_out_"+str(i), "ISI_out_"+str(i)]

    time = [int(i.split(":")[1]) for i in re.findall(r"Time : \d+", soup.text)]

    df["Time(s)"] = time

    # NeuronID info for each spike generating neuron
    n_id_info = re.findall("NeuronID: (.+)", soup.text)

    spikes_info = [list(map(int,re.findall(r'(\d+)', i))) for i in n_id_info]
    
    n_id_info_v1 = []

    for idx in range(0, len(spikes_info), ip_spks):
      var = list(chain.from_iterable((x[0],len(x)-1, x[1:]) for x in spikes_info[idx: idx + ip_spks]))
      n_id_info_v1.append(var)
  
    df[cols[1:-3 *ip_spks]] = n_id_info_v1

    # Output column info
    out = re.findall("Output Neuron at column (.+)", soup.text)

    spikes_info_out = [list(map(int, re.findall(r'(\d+)', i))) for i in out]
    out_info = []

    for idx in range(0, len(spikes_info_out), ip_spks):
      var1 = list(chain.from_iterable((x[0],len(x)-1, x[1:]) for x in spikes_info_out[idx: idx + ip_spks]))
      out_info.append(var1)

    # Checking #instances
    assert len(out_info) == len(time) == len(n_id_info_v1)

    df[cols[-3*ip_spks:]] = out_info


    return df

In [None]:
# To compute average ISI (Inter-spiking interval)
def getAvgIsi(x):

  if len(x) == 0:
    return 0

  elif len(x) == 1:
    return x[0]%1000

  else:
    x = [i%1000 for i in x]
    if x[0] == 0:
      x[0] = 1
    return sum([(x[i+1])- (x[i]) for i in range(len(x)-1)])/(len(x)-1)

In [None]:
# To compute COV (coefficient of variation) or Variance of spikes
def cov_or_variance(x, type_metric = "cov"):

  if len(x) == 0:
    return 0

  elif len(x) == 1:
    return x[0]%1000

  else:
    x = [i%1000 for i in x]
    if x[0] == 0:
      x[0] = 1
    imd = np.array([x[i]%1000 for i in range(len(x))])
    mu = np.mean(imd)
    
    if type_metric == "cov": 
      # coefficient of variation
      cov = np.sqrt(np.sum(np.square(imd - mu))/ (len(imd) - 1))/mu
      return cov
    else:
      # Variance of spikes
      var_spk = np.sum(np.square(imd - mu))/ (len(imd) - 1)
      return var_spk

In [None]:
# # To make a directory to save all log files in .xlsx for each pre-synaptic (input) and post-synaptic (output) neuron.
os.mkdir("files_32_raw")

In [None]:
def SaveRawFile(dataframe, fol):
  """
  Args :
  dataframe: Dataframe
  fol : File name of post-synaptic neuron ID
  
  """

  columns = dataframe.columns
  f = pd.DataFrame()

  idx = 0
  for e in columns:
    
    if e.startswith("ISI_"):

      if e.startswith("ISI_out_"):
          f[e] =  dataframe[e]
          f['Avg_ISI_out_'+e.split("ISI_out_")[-1]] =  dataframe[e].apply(lambda x: getAvgIsi(x))
          f['Cov_spk_out_'+e.split("ISI_out_")[-1]] =  dataframe[e].apply(lambda x: cov_or_variance(x, type_metric = "cov"))
          f['Var_spk_out_'+e.split("ISI_out_")[-1]] =  dataframe[e].apply(lambda x: cov_or_variance(x, type_metric = "var"))
        
      else:
        f[e] =  dataframe[e]
        f['Avg_ISI_'+str(idx)] =  dataframe['ISI_' + str(idx)].apply(lambda x: getAvgIsi(x))
        f['Cov_spk_ip_'+str(idx)] =  dataframe['ISI_' + str(idx)].apply(lambda x: cov_or_variance(x, type_metric = "cov"))
        f['Var_spk_ip_'+str(idx)] =  dataframe['ISI_' + str(idx)].apply(lambda x: cov_or_variance(x, type_metric = "var"))
        idx += 1

    else:
      f[e] = dataframe[e]

  f.to_excel("files_32_raw/" + fol[:-4]  + ".xlsx", index = False)
  

In [None]:
fol_name = os.listdir("Output_32x32_precise_1k_logs/")
assert len(fol_name) == 32
print(fol_name)

['log_weight0.txt', 'log_weight11.txt', 'log_weight1.txt', 'log_weight13.txt', 'log_weight15.txt', 'log_weight14.txt', 'log_weight10.txt', 'log_weight12.txt', 'log_weight17.txt', 'log_weight16.txt', 'log_weight23.txt', 'log_weight24.txt', 'log_weight20.txt', 'log_weight19.txt', 'log_weight21.txt', 'log_weight25.txt', 'log_weight18.txt', 'log_weight2.txt', 'log_weight22.txt', 'log_weight29.txt', 'log_weight3.txt', 'log_weight31.txt', 'log_weight30.txt', 'log_weight5.txt', 'log_weight6.txt', 'log_weight26.txt', 'log_weight27.txt', 'log_weight28.txt', 'log_weight4.txt', 'log_weight9.txt', 'log_weight7.txt', 'log_weight8.txt']


# Saving log files in the desirable and readable format

In [None]:
for fol in fol_name:
  
  # Saving raw data in excel file
  cp = fol.split("log_weight")[-1][:-4] # Crosspoints
  print(f"Column name : {fol} having {int(cp)+1} crosspoint/s.")
  df = getInfo("Output_32x32_precise_1k_logs"+ '/' + fol, 32)
  SaveRawFile(df, fol)


Column name : log_weight0.txt having 1 crosspoint/s.


  return asarray(a).ndim
  self[col] = igetitem(value, i)
  f['Avg_ISI_'+str(idx)] =  dataframe['ISI_' + str(idx)].apply(lambda x: getAvgIsi(x))
  f['Cov_spk_ip_'+str(idx)] =  dataframe['ISI_' + str(idx)].apply(lambda x: cov_or_variance(x, type_metric = "cov"))
  f['Var_spk_ip_'+str(idx)] =  dataframe['ISI_' + str(idx)].apply(lambda x: cov_or_variance(x, type_metric = "var"))
  f[e] = dataframe[e]
  f[e] =  dataframe[e]
  f[e] =  dataframe[e]
  f['Avg_ISI_out_'+e.split("ISI_out_")[-1]] =  dataframe[e].apply(lambda x: getAvgIsi(x))
  f['Cov_spk_out_'+e.split("ISI_out_")[-1]] =  dataframe[e].apply(lambda x: cov_or_variance(x, type_metric = "cov"))
  f['Var_spk_out_'+e.split("ISI_out_")[-1]] =  dataframe[e].apply(lambda x: cov_or_variance(x, type_metric = "var"))


Column name : log_weight1.txt having 2 crosspoint/s.
Column name : log_weight10.txt having 11 crosspoint/s.
Column name : log_weight11.txt having 12 crosspoint/s.
Column name : log_weight12.txt having 13 crosspoint/s.
Column name : log_weight13.txt having 14 crosspoint/s.
Column name : log_weight14.txt having 15 crosspoint/s.
Column name : log_weight15.txt having 16 crosspoint/s.
Column name : log_weight16.txt having 17 crosspoint/s.
Column name : log_weight17.txt having 18 crosspoint/s.
Column name : log_weight18.txt having 19 crosspoint/s.
Column name : log_weight19.txt having 20 crosspoint/s.
Column name : log_weight2.txt having 3 crosspoint/s.
Column name : log_weight20.txt having 21 crosspoint/s.
Column name : log_weight21.txt having 22 crosspoint/s.
Column name : log_weight22.txt having 23 crosspoint/s.
Column name : log_weight23.txt having 24 crosspoint/s.
Column name : log_weight24.txt having 25 crosspoint/s.
Column name : log_weight25.txt having 26 crosspoint/s.
Column name : 

In [None]:
# Sort the name of file of post-synaptic neuron ID

def atof(text):
    try:
        retval = float(text)
    except ValueError:
        retval = text
    return retval

def natural_keys(text):

    return [ atof(c) for c in re.split(r'[+-]?([0-9]+(?:[.][0-9]*)?|[.][0-9]+)', text) ]

In [None]:
# # To create a directory to save all features (Spike rate, Avg. ISI/COV/Variance) and their ground truth (Observed ISI) 
os.mkdir("StackedFiles_avg_ISI")
# os.mkdir("StackedFiles_var_spk")
# os.mkdir("StackedFiles_cov_spk")

In [None]:
"""
Extracting features (Spike rate, Avg. ISI/COV/Variance) and their ground truth (Observed ISI) and 
save in .npy format.

"""

def StackedFile(path, nG, variance_feat = False):
    """
    Args: 
    path : File name path for post-synaptic neuron ID
    nG : Number of post-synaptic neurons
    variance_feat : Coefficient of variation/Variance/Avg ISI as features to be included or not

    """

    files = [i for i in os.listdir(path) if i.endswith(".xlsx")]
    files.sort(key=natural_keys)  

    """
    Predicting only average ISI_out, not #Spikes_out
    """
    
    for i,f in enumerate(files):

      # Number of crosspoints (numCp)
      numCp = int(f.split("log_weight")[-1][:-5]) + 1
      print(f"File Name : {f} | Number of crosspoints : {numCp}")

      file = pd.read_excel(path + f)

      cols_drop = []

      for id in range(nG):
        cols_drop.append("spk_ip_" + str(id))
        cols_drop.append("spk_out_" + str(id))
        cols_drop.append("ISI_" + str(id))
        cols_drop.append("ISI_out_" + str(id))
        cols_drop.append("#Spikes_out_" + str(id))

        if variance_feat == "var":
          cols_drop.append("Cov_spk_ip_" + str(id))
          cols_drop.append("Cov_spk_out_" + str(id))
          cols_drop.append("Avg_ISI_" + str(id))
          cols_drop.append("Avg_ISI_out_" + str(id))

        elif not variance_feat: 
          cols_drop.append("Cov_spk_ip_" + str(id))
          cols_drop.append("Cov_spk_out_" + str(id))
          cols_drop.append("Var_spk_ip_" + str(id))
          cols_drop.append("Var_spk_out_" + str(id))
        else:
          cols_drop.append("Avg_ISI_" + str(id))
          cols_drop.append("Avg_ISI_out_" + str(id))
          cols_drop.append("Var_spk_ip_" + str(id))
          cols_drop.append("Var_spk_out_" + str(id))
          

      cols_drop.extend(['Time(s)'])

      file.drop(cols_drop, axis=1, inplace=True)

      file_v1 = file.reset_index(drop=True)

      if variance_feat == "var":
          savePath = "StackedFiles_var_spk"
          y_cols = ["Var_spk_out_" + str(idx) for idx in range(nG)]
      elif not variance_feat: 
          savePath = "StackedFiles_avg_ISI"
          y_cols = ["Avg_ISI_out_" + str(idx) for idx in range(nG)]
      else:
          savePath = "StackedFiles_cov_spk"
          y_cols = ["Cov_spk_out_" + str(idx) for idx in range(nG)]
          

      X_train  = file_v1.drop(y_cols, axis=1).values
      y_train =  file_v1[y_cols].values
      
      X_train_v1 = np.concatenate([X_train[:, :numCp*2], np.zeros((X_train.shape[0], X_train.shape[1] - (numCp*2)))], axis =1)
      
      if i == 0:
        X_tr, y_tr =  X_train_v1, y_train

      else:
        X_tr1, y_tr1 =  X_train_v1, y_train

        X_tr = np.concatenate([X_tr, X_tr1])
        y_tr = np.concatenate([y_tr, y_tr1])
   
      
    with open(savePath + "/X_train_stacked.npy", 'wb') as f:
      np.save(f, X_tr)

    for col in range(nG):
      with open(savePath +"/y_train_stacked_" + str(col) + ".npy", 'wb') as f:
        np.save(f, y_tr[:,col])


In [None]:
# Only considering #Spikes & Avg ISI as features
StackedFile(os.getcwd() + '/' + "files_32_raw/", 32, variance_feat = False)

File Name : log_weight0.xlsx | Number of crosspoints : 1
File Name : log_weight1.xlsx | Number of crosspoints : 2
File Name : log_weight2.xlsx | Number of crosspoints : 3
File Name : log_weight3.xlsx | Number of crosspoints : 4
File Name : log_weight4.xlsx | Number of crosspoints : 5
File Name : log_weight5.xlsx | Number of crosspoints : 6
File Name : log_weight6.xlsx | Number of crosspoints : 7
File Name : log_weight7.xlsx | Number of crosspoints : 8
File Name : log_weight8.xlsx | Number of crosspoints : 9
File Name : log_weight9.xlsx | Number of crosspoints : 10
File Name : log_weight10.xlsx | Number of crosspoints : 11
File Name : log_weight11.xlsx | Number of crosspoints : 12
File Name : log_weight12.xlsx | Number of crosspoints : 13
File Name : log_weight13.xlsx | Number of crosspoints : 14
File Name : log_weight14.xlsx | Number of crosspoints : 15
File Name : log_weight15.xlsx | Number of crosspoints : 16
File Name : log_weight16.xlsx | Number of crosspoints : 17
File Name : log_

In [None]:
# Only considering #Spikes & COV as features
# StackedFile(os.getcwd() + '/' + "files_32_raw/", 32, variance_feat = True)

File Name : log_weight0.xlsx | Number of crosspoints : 1
File Name : log_weight1.xlsx | Number of crosspoints : 2
File Name : log_weight2.xlsx | Number of crosspoints : 3
File Name : log_weight3.xlsx | Number of crosspoints : 4
File Name : log_weight4.xlsx | Number of crosspoints : 5
File Name : log_weight5.xlsx | Number of crosspoints : 6
File Name : log_weight6.xlsx | Number of crosspoints : 7
File Name : log_weight7.xlsx | Number of crosspoints : 8
File Name : log_weight8.xlsx | Number of crosspoints : 9
File Name : log_weight9.xlsx | Number of crosspoints : 10
File Name : log_weight10.xlsx | Number of crosspoints : 11
File Name : log_weight11.xlsx | Number of crosspoints : 12
File Name : log_weight12.xlsx | Number of crosspoints : 13
File Name : log_weight13.xlsx | Number of crosspoints : 14
File Name : log_weight14.xlsx | Number of crosspoints : 15
File Name : log_weight15.xlsx | Number of crosspoints : 16
File Name : log_weight16.xlsx | Number of crosspoints : 17
File Name : log_

In [None]:
# Only considering #Spikes & Variance of spikes as features
# StackedFile(os.getcwd() + '/' + "files_32_raw/", 32, variance_feat = "var")

File Name : log_weight0.xlsx | Number of crosspoints : 1
File Name : log_weight1.xlsx | Number of crosspoints : 2
File Name : log_weight2.xlsx | Number of crosspoints : 3
File Name : log_weight3.xlsx | Number of crosspoints : 4
File Name : log_weight4.xlsx | Number of crosspoints : 5
File Name : log_weight5.xlsx | Number of crosspoints : 6
File Name : log_weight6.xlsx | Number of crosspoints : 7
File Name : log_weight7.xlsx | Number of crosspoints : 8
File Name : log_weight8.xlsx | Number of crosspoints : 9
File Name : log_weight9.xlsx | Number of crosspoints : 10
File Name : log_weight10.xlsx | Number of crosspoints : 11
File Name : log_weight11.xlsx | Number of crosspoints : 12
File Name : log_weight12.xlsx | Number of crosspoints : 13
File Name : log_weight13.xlsx | Number of crosspoints : 14
File Name : log_weight14.xlsx | Number of crosspoints : 15
File Name : log_weight15.xlsx | Number of crosspoints : 16
File Name : log_weight16.xlsx | Number of crosspoints : 17
File Name : log_

In [None]:
def getBinary(y_true, y_pred, margin):
  
  """
  It will binarize the continuous values which are y_true and y_pred with
  the help of margin and threshold to plot ROC (Receiver operating characteristic).

  Args:
  y_true : Observed avg. ISI (continuous values)
  y_pred : Predicted avg. ISI (continuous values)
  margin : The margin is the obtained optimal MAE or a user can take loose margin as per his choice

  return:
  y_t : Binarized y_true
  y_p : Binarized y_pred

  The reason to opt for ROC curve because it offers an elegant way to
  plot true fault detection rate versus false fault detection rate.
 
  """

  thresh = pd.Series(y_true).median()   # We have taken median as a threshold because it is not impacted by the outliers
  y_t = []
  y_p = []

  for t,p in zip(y_true, y_pred):
    if np.abs(t - p) <= margin:
      if t> thresh:
        y_t.append(1)
        y_p.append(1)
      else:
        y_t.append(0)
        y_p.append(0)

    else:
      if (t > thresh) and (p > thresh):  # ex: t=60, p=80 but thresh = 55 --> t,p -> 1, therefore one of them has to be opposite to another because abs(t-p)>15.
        y_t.append(1)
        y_p.append(0)

      elif (t > thresh) and (p <= thresh):  # ex: t=60, p=40 but thresh = 55 --> t->1,  p-> 0, which satisfies the condition, one has to be opposite of another.
        y_t.append(1)
        y_p.append(0)

      elif (t <= thresh) and (p > thresh):  # ex: t=30, p=57 but thresh = 55 --> t->0,  p-> 1,
        y_t.append(0)
        y_p.append(1)

      elif (t <= thresh) and (p <= thresh): # ex: t=25, p=45 but thresh = 55 --> t->0, p-> 1, therefore one of them has to be opposite to another.
        y_t.append(0)
        y_p.append(1)
      

  y_t = np.array(y_t)
  y_p = np.array(y_p)

  return y_t, y_p

In [None]:
def plotROC(y_true, y_pred, reg, margin, color):

  """
  To plot ROC

  Args: 
  y_true : Observed avg. ISI
  y_pred : Predicted avg. ISI
  reg :  Regressor Name
  margin : The margin is the obtained optimal MAE or a user can take loose margin as per his choice
  color : Color

  return:
  y_t : Binarized y_true
  y_p : Binarized y_pred

  """

  y_t, y_p = getBinary(y_true, y_pred, margin)
  
  fpr, tpr, _ = roc_curve(y_t, y_p )
  roc_auc = auc(fpr, tpr)

  print()
  print("False positive rate : ", fpr[1])
  print("True positive rate : ", tpr[1])
  print("ROC Area under curve : ", roc_auc)

  plt.figure(figsize = (6,6))
  lw = 2
  plt.plot(fpr, tpr, color= color, lw=lw, label=f"ROC curve (area = %0.2f) for {reg}." % roc_auc,linewidth=4)
  plt.plot([0, 1], [0, 1], color="navy", lw=lw, linestyle="--")
  plt.xlim([0.0, 1.0])
  plt.ylim([0.0, 1.05])
  plt.xlabel("False Positive Rate", fontsize=15)
  plt.ylabel("True Positive Rate", fontsize=15)
  plt.title("Receiver operating characteristic curve")
  plt.legend(loc="lower right")
  plt.show()

  return y_t, y_p

In [None]:
# # To create a directory for saving scaler and model for each column
os.mkdir('Scaler_avg_ISI')
os.mkdir('Models_avg_ISI')
# os.mkdir('Scaler_cov_spk')
# os.mkdir('Models_cov_spk')
# os.mkdir('Scaler_var_spk')
# os.mkdir('Models_var_spk')

In [None]:
def DeepNN(type, ip_shape = None):

  """
  Args:
  type : ANN or CNN
  ip_shape : Input shape

  return:
  ANN or CNN model

  """

  
  hidden_units = 256
  n_filters = 16
  learning_rate = 0.001 

  # Initialize the weights with He normalization
  he_init = tf.keras.initializers.HeNormal()

  if type == "CNN":
    # Below hyperparameters of the model are selected by grid search.
    model = Sequential([
        Conv2D(filters= n_filters, kernel_size=(3, 3), kernel_initializer=he_init, activation='relu', input_shape=ip_shape),BatchNormalization(),
        MaxPooling2D((2, 2)),
        Conv2D(filters= n_filters, kernel_size=(1, 1), kernel_initializer=he_init, activation= 'relu'),BatchNormalization(),
        MaxPooling2D((1, 1)),
        Flatten(),
        Dense(hidden_units, kernel_initializer=he_init, activation='relu'),
        Dropout(0.3),
        Dense(1, kernel_initializer=he_init, activation='linear')
        ])
    
    # Loss function
    msle =  MeanSquaredError() 
    model.compile(loss=msle, optimizer=Adam(learning_rate=learning_rate), metrics=[MeanAbsoluteError()])

    return model


  elif type == "ANN":
    # Below hyperparameters of the model are selected by grid search.
    model = Sequential([
      Dense(hidden_units, kernel_initializer=he_init, activation='relu'),
      Dropout(0.2),
      Dense(hidden_units, kernel_initializer=he_init, activation='relu'),
      Dropout(0.4),
      Dense(1, kernel_initializer=he_init, activation='linear')])
    
    # Loss function
    msle =  MeanSquaredError() 
    model.compile(loss=msle, optimizer=Adam(learning_rate=learning_rate), metrics=[MeanAbsoluteError()])

    return model


In [None]:
def training(nG, ip_shape, scaling_mode, variance_feat =False):

  """
  Args:
  nG : NUmber of post-synaptic neurons
  scaling_mode : Type of scaling technique
  ip_shape : Dimension of an example (Mainly for CNN)
  variance_feat : Coefficient of variation/Variance/Avg ISI as features included or not

  return :
  MAE of each model that is trained for each post-synaptic neuron column

  """

  MAEs = [[], [], [], [], []]
  row, col, chn = ip_shape

  if variance_feat == "var":
    with open("StackedFiles_var_spk/X_train_stacked.npy", 'rb') as f:
      X = np.load(f)
  elif not variance_feat:
    with open("StackedFiles_avg_ISI/X_train_stacked.npy", 'rb') as f:
      X = np.load(f)
  else:
    with open("StackedFiles_cov_spk/X_train_stacked.npy", 'rb') as f:
      X = np.load(f)


  for id in range(nG):

    print("#"*25 + " " + "Column" + str(id) +" " + "#"*25)

    if variance_feat == "var":
      with open("StackedFiles_var_spk/y_train_stacked_" + str(id) + ".npy", 'rb') as f:
        y = np.load(f)
        y = y/10000  # (for better fit)

    elif not variance_feat:
      with open("StackedFiles_avg_ISI/y_train_stacked_" + str(id) + ".npy", 'rb') as f:
        y = np.load(f)
        y = y/10  # (for better fit)

    else:
      with open("StackedFiles_cov_spk/y_train_stacked_" + str(id) + ".npy", 'rb') as f:
        y = np.load(f)

    print()

    # Splitting the dataset into training, validation, and test set in (4:0.5:0.5) ratio
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.20, random_state=42) 
    X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.50, random_state=42)

    print("X_train : ", X_train.shape)
    print("y_train : ", y_train.shape)
    print("X_val : ", X_val.shape)
    print("y_val : ", y_val.shape)
    print("X_test : ", X_test.shape)
    print("y_test : ", y_test.shape)
    print()


    # Standard Scaling
    if scaling_mode == 'std':
      print("Standard Scaling",'\n')
      scaler = StandardScaler()
      x_train_scaled = scaler.fit_transform(X_train)
      x_test_scaled = scaler.transform(X_test)
      x_val_scaled = scaler.transform(X_val)

      if variance_feat == "var":
        if id == 0:
          with open(os.getcwd() + "/Scaler_var_spk/" + "std_Column_" + str(id) + ".pkl" , 'wb') as file:  
            pickle.dump(scaler, file)

      elif not variance_feat:
        # Saving scaler for each column
        if id == 0:
          with open(os.getcwd() + "/Scaler_avg_ISI/" + "std_Column_" + str(id) + ".pkl" , 'wb') as file:  
            pickle.dump(scaler, file)
      else:
        if id == 0:
          with open(os.getcwd() + "/Scaler_cov_spk/" + "std_Column_" + str(id) + ".pkl" , 'wb') as file:  
            pickle.dump(scaler, file)


    # Min-max Scaling
    elif scaling_mode == "minmax":
      print("Minmax Scaling \n")
      scaler = MinMaxScaler()
      x_train_scaled = scaler.fit_transform(X_train)
      x_test_scaled = scaler.transform(X_test)
      x_val_scaled = scaler.transform(X_val)

      if variance_feat == "var":
        if id == 0:
          with open(os.getcwd() + "/Scaler_var_spk/" + "minmax_Column_" + str(id) + ".pkl" , 'wb') as file:  
            pickle.dump(scaler, file)
      elif not variance_feat:
        if id == 0:
          # Saving scaler for each column
          with open(os.getcwd() + "/Scaler_avg_ISI/" + "minmax_Column_" + str(id) + ".pkl" , 'wb') as file:  
            pickle.dump(scaler, file)
      else:
        if id == 0:
          with open(os.getcwd() + "/Scaler_cov_spk/" + "minmax_Column_" + str(id) + ".pkl" , 'wb') as file:  
            pickle.dump(scaler, file)

    # Training model
    model_name = ["XGBRegressor", "LGBMRegressor", "CatBoostRegressor", "ANN", "CNN"]

    # Below hyperparameters of the models are selected by grid search.
    if variance_feat == "var":
      reg1 = XGBRegressor(n_estimators = 150, max_depth = 4, learning_rate=0.2, booster = 'gbtree', tree_method = 'auto',random_state=42) 
      reg2 = LGBMRegressor(n_estimators = 200, boosting_type = 'dart',max_depth = 5, num_leaves = 25, learning_rate=0.4, random_state =42) 
      reg3 = CatBoostRegressor(learning_rate = 0.05,boosting_type = 'Plain',loss_function = "RMSE",verbose = False) 
    else:
      reg1 = XGBRegressor(n_estimators = 100, max_depth = 5, learning_rate=0.2, booster = 'gbtree', tree_method = 'auto',random_state=42)
      reg2 = LGBMRegressor(n_estimators = 100, boosting_type = 'dart',max_depth = 6, num_leaves = 36, learning_rate=0.4, random_state =42)
      reg3 = CatBoostRegressor(learning_rate = 0.06,boosting_type = 'Plain',loss_function = "RMSE",verbose = False)
    
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', verbose=1, patience=5, mode= 'min')
    ann = DeepNN('ANN')
    cnn = DeepNN('CNN', (row, col, chn))


    for ix,reg in enumerate([reg1, reg2, reg3, ann, cnn]): 

      if len(model_name) -1 == ix:
        reg.fit(x_train_scaled.reshape((x_train_scaled.shape[0],row, col, chn)), y_train, epochs=151, batch_size=128, callbacks=[early_stopping], 
                                                    validation_data=(x_val_scaled.reshape((x_val_scaled.shape[0],row, col, chn)), y_val), verbose=0)
        
      elif len(model_name)-2 == ix:
        reg.fit(x_train_scaled, y_train, epochs=151, batch_size=128, callbacks=[early_stopping], validation_data=(x_val_scaled, y_val), verbose=0)

      else:
        reg.fit(x_train_scaled, y_train)

      if len(model_name) -1 == ix:
        tr_mae = np.round(mean_absolute_error(y_train, reg.predict(x_train_scaled.reshape((x_train_scaled.shape[0],row, col, chn)))), 5)
        val_mae = np.round(mean_absolute_error(y_val, reg.predict(x_val_scaled.reshape((x_val_scaled.shape[0],row, col, chn)))), 5)
        test_mae = np.round(mean_absolute_error(y_test, reg.predict(x_test_scaled.reshape((x_test_scaled.shape[0],row, col, chn)))), 5)

      else:
        tr_mae = np.round(mean_absolute_error(y_train, reg.predict(x_train_scaled)), 5)
        val_mae = np.round(mean_absolute_error(y_val, reg.predict(x_val_scaled)), 5)
        test_mae = np.round(mean_absolute_error(y_test, reg.predict(x_test_scaled)), 5)

      reg_name = model_name[ix]

      print(f"MAE of {reg_name} on training set is {tr_mae}.")
      print(f"MAE of {reg_name} on validation set is {val_mae}.")
      print(f"MAE of {reg_name} on test set is {test_mae}.")
      print()
      print("*"*71)
      print()

      if reg_name == "XGBRegressor":
        MAEs[0].append((tr_mae, val_mae, test_mae))

      elif reg_name == "LGBMRegressor":
        MAEs[1].append((tr_mae, val_mae, test_mae))

      elif reg_name == "CatBoostRegressor":
        MAEs[2].append((tr_mae, val_mae, test_mae))

      elif reg_name == "ANN":
        MAEs[3].append((tr_mae, val_mae, test_mae))

      elif reg_name == "CNN":
        MAEs[4].append((tr_mae, val_mae, test_mae))


      if variance_feat == "var":
        if reg_name in ["XGBRegressor", "LGBMRegressor", "CatBoostRegressor"]:
          # Saving regressor model for each column
          with open(os.getcwd() + "/Models_var_spk/" + reg_name + "_column_" + str(id) + ".pkl" , 'wb') as file:  
            pickle.dump(reg, file)
        else:
          reg.save(os.getcwd()+ "/Models_var_spk/" + reg_name + "_column_" + str(id) +".h5")

      elif not variance_feat:

        if reg_name in ["XGBRegressor", "LGBMRegressor", "CatBoostRegressor"]:
          # Saving regressor model for each column
          with open(os.getcwd() + "/Models_avg_ISI/" + reg_name + "_column_" + str(id) + ".pkl" , 'wb') as file:  
            pickle.dump(reg, file)
        else:
          reg.save(os.getcwd()+ "/Models_avg_ISI/" + reg_name + "_column_" + str(id) +".h5")

      else:
        if reg_name in ["XGBRegressor", "LGBMRegressor", "CatBoostRegressor"]:
          # Saving regressor model for each column
          with open(os.getcwd() + "/Models_cov_spk/" + reg_name + "_column_" + str(id) + ".pkl" , 'wb') as file:  
            pickle.dump(reg, file)
        else:
          reg.save(os.getcwd()+ "/Models_cov_spk/" + reg_name + "_column_" + str(id) +".h5")


  print()
  print("Results :\n")
  for ix1,result in enumerate(MAEs):
    print("*"*25 + " " + model_name[ix1] + " " + "*"*25)
    for col_res in result:
      tr_mae, val_mae, test_mae = col_res
      print(model_name[ix1] + " : | Training MAE : " + str(tr_mae) + " | Validation MAE : " + str(val_mae) + " | Test MAE : " + str(test_mae))

    print()
    print()

    
  return MAEs
    

In [None]:
seed_value= 42
os.environ['PYTHONHASHSEED']=str(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

# Considering #Spikes, Avg. ISI as features
MAEs_ISI = training(nG = 32, ip_shape = (8,8,1), scaling_mode = "minmax", variance_feat = False)

######################### Column0 #########################

X_train :  (256000, 64)
y_train :  (256000,)
X_val :  (32000, 64)
y_val :  (32000,)
X_test :  (32000, 64)
y_test :  (32000,)

Minmax Scaling 

MAE of XGBRegressor on training set is 0.36229.
MAE of XGBRegressor on validation set is 0.37359.
MAE of XGBRegressor on test set is 0.36503.

***********************************************************************

MAE of LGBMRegressor on training set is 0.36656.
MAE of LGBMRegressor on validation set is 0.37689.
MAE of LGBMRegressor on test set is 0.36883.

***********************************************************************

MAE of CatBoostRegressor on training set is 0.36037.
MAE of CatBoostRegressor on validation set is 0.36932.
MAE of CatBoostRegressor on test set is 0.35941.

***********************************************************************

Epoch 10: early stopping
MAE of ANN on training set is 0.40308.
MAE of ANN on validation set is 0.40798.
MAE of ANN on test set is

In [None]:
# # Saving MAEs_ISI to numpy array
# with open('MAEs_ISI.npy', 'wb') as f:
#     np.save(f, np.array(MAEs_ISI))

In [1]:
# Loading MAEs_ISI to numpy array
with open('MAEs_ISI.npy', 'rb') as f:
    MAEs_ISI = np.load(f)

MAEs_ISI.shape

(5, 32, 3)

In [None]:
# seed_value= 42
# os.environ['PYTHONHASHSEED']=str(seed_value)
# random.seed(seed_value)
# np.random.seed(seed_value)
# tf.random.set_seed(seed_value)

# # Considering #Spikes, COV as features
# MAEs_COV = training(nG = 32, ip_shape = (8,8,1), scaling_mode = "minmax", variance_feat = True) 

In [None]:
# # Saving MAEs_COV to numpy array
# with open('MAEs_COV.npy', 'wb') as f:
#     np.save(f, np.array(MAEs_COV))

# # Loading MAEs_COV to numpy array
# with open('MAEs_COV.npy', 'rb') as f:
#     MAEs_COV = np.load(f)

In [None]:
# seed_value= 42
# os.environ['PYTHONHASHSEED']=str(seed_value)
# random.seed(seed_value)
# np.random.seed(seed_value)
# tf.random.set_seed(seed_value)

# # Considering #Spikes, Variance as features
# MAEs_VAR = training(nG = 32, ip_shape = (8,8,1), scaling_mode = "std", variance_feat = "var")

In [None]:
# # Saving MAEs_VAR to numpy array
# with open('MAEs_VAR.npy', 'wb') as f:
#     np.save(f, np.array(MAEs_VAR))

In [None]:
# # Loading MAEs_VAR to numpy array
# with open('MAEs_VAR.npy', 'rb') as f:
#     MAEs_VAR = np.load(f)

In [None]:
def getMu_Std(maes):
  """
  Args:
  maes :  Mean absolute error for all columns for each model

  return :
  mu, std : Mean and Standard deviation of performance of each model

  """

  # Fetching MAE of test set across each column for each model
  mae_te = [[j[-1] for j in i] for i in maes]

  # Mean of MAE of test set
  mu = np.mean(mae_te, axis=1)

  # Standard deviation of MAE of test set
  std = np.sqrt(np.var(mae_te, axis=1))

  return mu, std

In [None]:
# Considering only #Spikes, Avg ISI as features
# ["XGBRegressor", "LGBMRegressor", "CatBoostRegressor", "ANN", "CNN"]
mu_ISI, std_ISI = getMu_Std(MAEs_ISI)

In [None]:
# Considering only #Spikes, COV as features
# ["XGBRegressor", "LGBMRegressor", "CatBoostRegressor", "ANN", "CNN"]
# mu_COV, std_COV = getMu_Std(MAEs_COV)

In [None]:
# Considering only #Spikes, Variance as features
# ["XGBRegressor", "LGBMRegressor", "CatBoostRegressor", "ANN", "CNN"]
# mu_VAR, std_VAR = getMu_Std(MAEs_VAR)