In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Preprocessing for unseen_nominal_logs plus unseen_logs

In [None]:
cd drive/MyDrive/SNN_data

In [None]:
 # Go to that directory where log files are saved

In [None]:
 cd Column32_seq/ 

In [None]:
!pwd

In [None]:
import os,re, pickle, random
import numpy as np
import pandas as pd
from itertools import chain
from bs4 import BeautifulSoup

In [None]:
def getInfo(file, ip_spks):

    """
    Args:
    file : Log files 
    ip_spks : Number of input neurons or pre-synaptic neurons

    return :
    df : Dataframe
    
    """

    with open(file) as f:
        soup = BeautifulSoup(f.read(), "html.parser")

    df = pd.DataFrame()

    # Making column names
    cols = ["Time(s)", "Column Number"]
    for i in range(ip_spks):
      cols.append("spk_ip_" + str(i))
      cols.append("#Spikes_" + str(i))
      cols.append("ISI_" + str(i))

    cols += ["#Spikes_out", "ISI_out"]

    time = [int(i.split(":")[1]) for i in re.findall(r"Time : \d+", soup.text)]

    df["Time(s)"] = time

    # Column number info
    col_num = [int(j.split(":")[1]) for j in re.findall("Column number: \d+",soup.text)]

    df["Column Number"] = col_num 

    # NeuronID info for each spike generating neuron
    n_id_info = re.findall("NeuronID: (.+)", soup.text)

    spikes_info = [list(map(int,re.findall(r'(\d+)', i))) for i in n_id_info]
    
    n_id_info_v1 = []

    for idx in range(0, len(spikes_info), ip_spks):
      
      var = list(chain.from_iterable((x[0],len(x)-1, x[1:]) for x in spikes_info[idx: idx + ip_spks]))
      
      n_id_info_v1.append(var)
  

    df[cols[2:-2]] = n_id_info_v1


    # Output column info
    out = re.findall("Output Neuron at column (.+)", soup.text)

    spikes_info_out = [list(map(int, re.findall(r'(\d+)', i))) for i in out]
    # print(spikes_info_out)
    out_info = []

    for i_out in spikes_info_out:
      out_info.append([len(i_out)-1, i_out[1:]])

    # Checking #instances
    assert len(out_info) == len(time) == len(col_num) == len(n_id_info_v1)
    
    df[cols[-2:]] = out_info


    return df

In [None]:
# To compute average ISI (Inter-spiking interval)
def getAvgIsi(x):

  if len(x) == 1:
    return x[0]%1000

  else:
    return sum([(x[i+1]%1000)- (x[i]%1000) for i in range(len(x)-1)])/(len(x)-1)

In [None]:
def SaveRawFile(dataframe, i, fol, dir_name):
  """
  Args :
  dataframe: Dataframe
  i : Post-synaptic neuron ID
  fol : File name of post-synaptic neuron ID
  dir_name : Directory name where formatted logs will be saved
  
  """

  columns = dataframe.columns
  f = pd.DataFrame()

  idx = 0
  for e in columns:
    
    if e.startswith("ISI_"):

      if e == "ISI_out":
          f[e] =  dataframe[e]
          f['Avg_ISI_out'] =  dataframe['ISI_out'].apply(lambda x: getAvgIsi(x))
        
      else:
        f[e] =  dataframe[e]
        f['Avg_ISI_'+str(idx)] =  dataframe['ISI_' + str(idx)].apply(lambda x: getAvgIsi(x))
        idx += 1

    else:
      f[e] = dataframe[e]

  f.to_excel(dir_name + "/" + fol + "/" + "Column" + str(i) + ".xlsx", index = False)
  

In [None]:
fol_name = os.listdir("Unseen_nominal_logs/")
assert len(fol_name) == 32
print(fol_name)

['Column_17', 'Column_26', 'Column_29', 'Column_16', 'Column_27', 'Column_2', 'Column_19', 'Column_9', 'Column_1', 'Column_30', 'Column_10', 'Column_7', 'Column_24', 'Column_12', 'Column_28', 'Column_22', 'Column_25', 'Column_31', 'Column_20', 'Column_6', 'Column_8', 'Column_23', 'Column_5', 'Column_11', 'Column_3', 'Column_4', 'Column_15', 'Column_0', 'Column_21', 'Column_14', 'Column_18', 'Column_13']


In [None]:
# To make a directory to save all unseen nominal log files for each pre-synaptic neuron or input neuron
os.mkdir("files_32_nominal_raw")

# Saving log files in the desirable and readable format

In [None]:
for var in [("files_32_nominal_raw", "Unseen_nominal_logs/")]:

  dir_name, unseen_log = var
  for fol in fol_name:
      logs_list = [i for i in os.listdir(unseen_log + fol) if i.endswith(".txt")]

      os.mkdir(dir_name + "/"+ fol) 

      # Saving raw data in excel file
      for file_name in logs_list:
        print(f"Folder Name : {fol} | Column name : {file_name}.")
        col_num = int(file_name.split(".txt")[0][10:])
        df = getInfo(unseen_log + fol + "/" + file_name, 32)
        SaveRawFile(df, col_num, fol, dir_name)

Folder Name : Column_17 | Column name : log_weight17.txt.


  return asarray(a).ndim


Folder Name : Column_26 | Column name : log_weight26.txt.
Folder Name : Column_29 | Column name : log_weight29.txt.
Folder Name : Column_16 | Column name : log_weight16.txt.
Folder Name : Column_27 | Column name : log_weight27.txt.
Folder Name : Column_2 | Column name : log_weight2.txt.
Folder Name : Column_19 | Column name : log_weight19.txt.
Folder Name : Column_9 | Column name : log_weight9.txt.
Folder Name : Column_1 | Column name : log_weight1.txt.
Folder Name : Column_30 | Column name : log_weight30.txt.
Folder Name : Column_10 | Column name : log_weight10.txt.
Folder Name : Column_7 | Column name : log_weight7.txt.
Folder Name : Column_24 | Column name : log_weight24.txt.
Folder Name : Column_12 | Column name : log_weight12.txt.
Folder Name : Column_28 | Column name : log_weight28.txt.
Folder Name : Column_22 | Column name : log_weight22.txt.
Folder Name : Column_25 | Column name : log_weight25.txt.
Folder Name : Column_31 | Column name : log_weight31.txt.
Folder Name : Column_2

In [None]:
# Sort the name of file of post-synaptic neuron ID

def atof(text):
    try:
        retval = float(text)
    except ValueError:
        retval = text
    return retval

def natural_keys(text):

    return [ atof(c) for c in re.split(r'[+-]?([0-9]+(?:[.][0-9]*)?|[.][0-9]+)', text) ]

In [None]:
# To create a directory to save all features (Spike rate, Avg. ISI) and their ground truth (Observed ISI) 
os.mkdir("StackedFiles_nominal")

In [None]:
"""
Extracting features (Spike rate, Avg. ISI) and their ground truth (Observed ISI) and then
apply breakout distribution and then data augmentation.

"""

def StackedFile(path, nG, fol, dirName, numCp):
    """
    Args:
    path : File name path for post-synaptic neuron ID
    nG : Number of post-synaptic neurons
    fol : post-synaptic neuron ID
    dirName : Directory name where stacked files will be saved
    numCp : Number of mapped crosspoints

    """
    files = [i for i in os.listdir(path) if i.endswith(".xlsx")]
    files.sort(key=natural_keys)  

    # print(files, files[0].split('.xlsx')[0][6:])

    # increment = int(files[0].split('.xlsx')[0][6:]) + 1

    for i,f in enumerate(files):

        """
        Predicting only average ISI_out, not #Spikes_out
        """
        
        file = pd.read_excel(path + '/' + f)

        cols_drop = []

        for id in range(nG):
          cols_drop.append("spk_ip_" + str(id))
          cols_drop.append("ISI_" + str(id))

        # cols_drop.append("ISI_out")

        file.drop(cols_drop, axis=1, inplace=True)

        file_v1 = file.reset_index(drop=True)


        X_train  = file_v1.drop(labels = ['Time(s)', 'Column Number', '#Spikes_out','Avg_ISI_out', "ISI_out"], axis=1).values
        y_train =  file_v1['ISI_out'].values

        X_train_v1 = np.concatenate([X_train[:, :numCp*2], np.zeros((X_train.shape[0], X_train.shape[1] - (numCp*2)))], axis =1)
        
        if i == 0:
          X_tr, y_tr =  X_train_v1, y_train
 
        else:
          X_tr1, y_tr1 =  X_train_v1, y_train

          X_tr = np.concatenate([X_tr, X_tr1])
          y_tr = np.concatenate([y_tr, y_tr1])

   
    with open(dirName + "/X_unseen_stacked_" + fol + ".npy", 'wb') as f:
      np.save(f, X_tr)

    with open(dirName+"/y_unseen_stacked_" + fol + ".npy", 'wb') as f:
      np.save(f, y_tr)


In [None]:
for var1 in [("files_32_nominal_raw", "StackedFiles_nominal")]:

  dr, dirName = var1

  # Saving the features and their ground truth for each column of crossbar array
  for fol in fol_name:
    print("#" * 25 + " " + fol + " " + "#"*25)
    StackedFile(os.getcwd() + '/' + dr+"/" + fol + "/", 32, fol, dirName, numCp = 12)


######################### Column_17 #########################
######################### Column_26 #########################
######################### Column_29 #########################
######################### Column_16 #########################
######################### Column_27 #########################
######################### Column_2 #########################
######################### Column_19 #########################
######################### Column_9 #########################
######################### Column_1 #########################
######################### Column_30 #########################
######################### Column_10 #########################
######################### Column_7 #########################
######################### Column_24 #########################
######################### Column_12 #########################
######################### Column_28 #########################
######################### Column_22 #########################
############