### Connect Drive

In [92]:
from google.colab import drive
drive.mount('/content/drive') 

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Package Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd
import tensorflow as tf
from scipy.signal import savgol_filter
from collections import Counter

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from scipy.spatial import distance
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances, cosine_distances

### GPU Device

In [None]:
gpu = tf.test.gpu_device_name()
print(gpu)




### Reshape for Frame by Frame Plots

In [None]:
def reshape_data(df, rows, cols):
  X = df.values #pandas.DataFrame.values: Return a Numpy representation of the DataFrame.
  X = X.reshape(-1, rows, cols, order='F') #or C. different reshaping row by row or column by column but this works
  return X

### Pre-Processing Helper Functions

In [None]:
def filter_by_vref(X, v_thresh=70):
    '''
    Identifies active pixels by checking if one of the first 10 derivatives d(i) is > v_thresh
    Parameters
    ---------
    X : ndarray
        Input 2D array (T x NM). T = time samples, NM = total number of pixels
    v_thresh : int, optional
        Minimum value of the derivative d(i)=X(i+1)-X(i) in mV. Default is 70
    Returns
    -------
    ndarray
        1D array of bool with dimension (NM). For each pixel, returns True if, during the first 10 samples,
        one of the derivatives is > v_thresh. The derivatives are calculated as d(i) = X(i+1)-X(i)
    '''
    return (np.diff(X[:10, :], axis=0) > v_thresh).any(axis=0)  # check if one of the first 10 derivatives is >v_thresh

In [None]:
def filter_by_vrange(X, v_range=(100, 900)):
    '''
    Identifies active pixels by checking that all the values are in v_range
    Parameters
    ---------
    X : ndarray
        Input 2D array (T x NM). T = time samples, NM = total number of pixels
    v_range : (int, int), optional
        tuple containing the minimum and maximum allowable voltage in mV. Default is (100, 900)
    Returns
    -------
    ndarray
        1D array of bool with dimension (NM). For each pixel, returns True if the value is always in v_range
    '''
    return (X < v_range[1]).all(axis=0) & (X > v_range[0]).all(axis=0)  # for each pixel, check if all the values are
    # within the given range


In [None]:
def filter_by_derivative(X, vthresh=5):
    """ Identifies active pixels by checking that the absolute value of the derivative is always below vthresh
    Parameters
    ----------
    X : ndarray
        input 2D array of shape TxNM
    vthresh : int
        threshold for active pixels. Default is 5
    Returns
    -------
    ndarray
        1D array of bool with dimension (NM). For each pixel, returns True if all the derivatives are below vthresh
    """
    x_diff = np.abs(np.diff(X, axis=0))
    return (x_diff < vthresh).all(axis=0)

In [None]:
def filter_active_pixels(df, v_thresh_ref=50, v_range=(100, 900), v_thresh_deriv=5): #v_thresh_ref changed from 70 to 50
  active = filter_by_vref(df.values, v_thresh_ref) & filter_by_vrange(df.values, v_range) & filter_by_derivative(df.values, v_thresh_deriv)

  # set pixel values to 0/nan
  for idx, col in enumerate(df.columns):
    if(not active[idx]):
      df.loc[:, col] = 0

  return df

In [None]:
def filter_active_pixels_drop(df, v_thresh_ref=50, v_range=(100, 900), v_thresh_deriv=5): #v_thresh_ref changed from 70 to 50
  active = filter_by_vref(df.values, v_thresh_ref) & filter_by_vrange(df.values, v_range) & filter_by_derivative(df.values, v_thresh_deriv)
  
  # drop pixels 
  df = df.loc[: , active]
  return df

In [None]:
def filter_active_pixels_deriv(df, v_thresh_deriv=5): 
  active = filter_by_derivative(df.values, v_thresh_deriv)

  # for idx, col in enumerate(df.columns):
  #   if(not active[idx]):
  #     df.loc[:, col] = 0
  
  # drop pixels 
  df = df.loc[: , active]
  return df

In [None]:
def time_to_index(times, time_vect):
    '''
    Returns index of the times closest to the desired ones time_vect
    Arguments
    ---------
    times : list
        list of integers containing the desired times
    time_vect : nparray
        array of the times at which the values are sampled
    Returns
    -------
    list
        for each element in the input list times, return an element in the output list
        with the index of the sample closest to the desired time
    '''
    indices = []
    for time in times:  # for each time in the input list
        indices.append( np.argmin(np.abs(time_vect - time)) )
        # find index of the sampled time (in time_vect) closest to the desired one (time)
    return indices


def find_loading_time(time_vect, X, bounds=(600, 900), viz=False):  # for v2
    ''' Finds loading and settling time for the data of v2 chip
    Parameters
    ----------
    time_vect : ndarray
        1D array with dimension T containing the sampling times
    X : ndarray
        2D array with dimension TxNM containing the sampled data
    bounds : list, optional
        tuple containing the minimum and maximum times (in ms) where the loading time has to be searched.
        Default is (600, 900)
    viz : bool, optional
        if viz=True, show the plot. Default is False
    Returns
    -------
    tuple
        - settled_index : index at which the settling occurs
        - settled_time : time at which the settling occurs
    '''

    search_start, search_end = time_to_index(bounds, time_vect)  # for each time in bounds, find the index
    # of the sample (in time_vect) that is closest to the desired one (in bounds)
    X_mean = np.mean(X, axis=1)  # for each sample, calculate the mean of all pixels
    X_mean_diff = np.diff(X_mean)  # find the derivative

    loading_index = np.argmax(X_mean_diff[search_start:search_end]) + search_start + 1  # find the index
    # where the derivative is max in the specified interval
    loading_index = loading_index  # add settling time
    settled_index = loading_index + 10  # add settling time
    settled_time = time_vect[settled_index]  # find the time that index corresponds to

    if viz:  # if viz is true, plot the following
        fig, ax = plt.subplots(3, 1)
        fig.suptitle('Finding Loading Time...')

        ax[0].set(title='Active Chemical Pixels, ACP')
        ax[0].plot(time_vect, X)  # plot the active chemical pixels

        ax[1].set(title='Mean(ACP)')
        ax[1].plot(time_vect, X_mean)  # plot the average of the pixels
        ax[1].axvline(time_vect[search_start], color='C1')  # plot vertical line: beginning of the interval
        ax[1].axvline(time_vect[search_end], color='C1')  # plot vertical line: end of the interval
        ax[1].axvline(settled_time, color='C2')  # plot vertical line: the loading time that was found

        ax[2].set(title='Diff(Mean(ACP))')
        ax[2].plot(time_vect[1:], X_mean_diff)  # plot the derivative of the mean
        ax[2].axvline(time_vect[search_start], color='C1')  # plot vertical line: beginning of the interval
        ax[2].axvline(time_vect[search_end], color='C1')  # plot vertical line: end of the interval
        ax[2].axvline(settled_time, color='C2')  # plot vertical line: the loading time that was found

        plt.tight_layout()
        plt.show()
    return settled_index, settled_time

In [None]:
def preprocess_data(df, deriv_thresh, settle_bounds, row_dim, col_dim, average=False, drop_pixel=False, deriv_thresh_bgsub=5):
  if(drop_pixel): # drop inactive pixels
    df = filter_active_pixels_drop(df=df, v_thresh_deriv=deriv_thresh)
  else: # set inactive pixels to 0
    df = filter_active_pixels(df=df, v_thresh_deriv=deriv_thresh) # filter out inactive pixels

  settle_idx, settle_time = find_loading_time(df.index, df, bounds=(600, 900), viz=False) # find settling point
  df = df.iloc[settle_idx:, :] # use only the data after the settling time
  df = df.sub(df.iloc[0, :], axis='columns') # subtract value of first pixel from all pixels

  if(len(filter_active_pixels_deriv(df=df, v_thresh_deriv=deriv_thresh_bgsub).columns) != 0): # check if there is still data present after filtering
    df = filter_active_pixels_deriv(df=df, v_thresh_deriv=deriv_thresh_bgsub) # if data is present do filtering otherwise don't

  df = df.iloc[0:150+250, :] # take only 150 samples after settling point (approx 19-20mins) - extra +200 added on to see impact on graph after pre-processing
  df['Average Output'] = df.mean(axis=1) # compute the mean value after filtering inactive pixels 

  # df['Average Output'] = normalise_data(df['Average Output']) # normalise data using mix-max scaling

  df['Average Output'] = savgol_filter(df['Average Output'],101, 3) # filter to smooth out the noise in the data 
  return df

In [None]:
def normalise_data(series):
  return (series - series.min()) / (series.max() - series.min())

### Data Loading Helper Functions

In [None]:
def load_partial_covid_exp(filepath, filter_window, filter_poly):
  data = pd.read_csv(filepath, header=0)
  # data['average bs data bot'] = normalise_data(data['average bs data bot'])
  data['average bs data bot'] = savgol_filter(data['average bs data bot'],filter_window, filter_poly)

  # data['average bs data top'] = normalise_data(data['average bs data top'])
  data['average bs data top'] = savgol_filter(data['average bs data top'],filter_window, filter_poly)

  df_pos = pd.DataFrame({"Time Elapsed" : data['time bot'], "Average Output": data['average bs data bot']}).dropna()

  df_neg = pd.DataFrame({"Time Elapsed" : data['time top'], "Average Output": data['average bs data top']}).dropna()

  return df_pos, df_neg

### Evaluation Metric Helper Functions

In [None]:
def accuracy(classifications):
  total = len(final_classifications)
  total_correct = 0
  for i in final_classifications.values():
    if(i[0] == i[1]):
      total_correct +=1

  accuracy = (total_correct/total)*100

  return accuracy

In [None]:
def sensitivity(classifications):
  true_pos = 0
  false_neg = 0

  for i in final_classifications.values():

    true_label = int(i[1])
    predicted = int(i[0])

    if(true_label == 1 and predicted == 1):
      true_pos += 1
    
    if(true_label == 1 and predicted == 0):
      false_neg += 1

  sensitivity = 100*(true_pos/(true_pos + false_neg))

  return sensitivity

In [None]:
def specificity(classification):
  true_neg = 0
  false_pos = 0

  for i in final_classifications.values():
    true_label = int(i[1])
    predicted = int(i[0])
    
    if(true_label == 0 and predicted == 0):
      true_neg += 1
    
    if(true_label == 0 and predicted == 1):
      false_pos += 1

  specificity = 100*(true_neg/(true_neg + false_pos))

  return specificity

In [None]:
def precision(classification):
  true_pos = 0
  false_pos = 0

  for i in final_classifications.values():
    true_label = int(i[1])
    predicted = int(i[0])
    
    if(true_label == 1 and predicted == 1):
      true_pos += 1
    
    if(true_label == 0 and predicted == 1):
      false_pos += 1

  precision = 100*(true_pos/(true_pos + false_pos))

  return precision

In [None]:
def f1(classification):
  numerator = 2*precision(classification)*sensitivity(classification)
  denominator = precision(classification) + sensitivity(classification)
  return numerator/denominator

### Array Dims

In [None]:
arr_rows = 78
arr_cols = 56

### Load Data

#### Positive Samples

In [None]:
## Average pixel value for all samples 

with tf.device(gpu):
  ## Gamma 1
  avg_data_g1_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/100921_DNA/100921_DNA/Data/gamma1.app.1e5/gamma1.app.1e5_data_export.csv"
  avg_g1 = pd.read_csv(avg_data_g1_file, header=0)

  ## Gamma 2
  avg_data_g2_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/100921_DNA/100921_DNA/Data/gamma2.app.1e4/gamma2.app.1e4_data_export.csv"
  avg_g2 = pd.read_csv(avg_data_g2_file, header=0)

  ## Gamma 3
  avg_data_g3_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/100921_DNA/100921_DNA/Data/gamma3.app.1e5/gamma3.app.1e5_data_export.csv"
  avg_g3 = pd.read_csv(avg_data_g3_file, header=0)
  
  ## Gamma 5 
  avg_data_g5_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/100921_DNA/100921_DNA/Data/gamma5.app.1e4/gamma5.app.1e4_data_export.csv"
  avg_g5 = pd.read_csv(avg_data_g5_file, header=0)

  ## 22RV1.ap1
  avg_data_22rv1_ap1_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22RV1.ap1/22RV1.ap1_data_export.csv"
  avg_22rv1_ap1 = pd.read_csv(avg_data_22rv1_ap1_file, header=0)

  ## 22RV1.ap2
  avg_data_22rv1_ap2_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22RV1.ap2/22RV1.ap2_data_export.csv"
  avg_22rv1_ap2 = pd.read_csv(avg_data_22rv1_ap2_file, header=0)

  ## 22RV1y.p1
  avg_data_22rv1y_p1_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22Rv1y.p1/22Rv1y.p1_data_export.csv"
  avg_22rv1y_p1 = pd.read_csv(avg_data_22rv1y_p1_file, header=0)

  ## 22RV1y.p3
  avg_data_22rv1y_p3_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22Rv1y.p3/22Rv1y.p3_data_export.csv"
  avg_22rv1y_p3 = pd.read_csv(avg_data_22rv1y_p3_file, header=0)

  ## 22RV1y.p4
  avg_data_22rv1y_p4_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22Rv1y.p4/22Rv1y.p4_data_export.csv"
  avg_22rv1y_p4 = pd.read_csv(avg_data_22rv1y_p4_file, header=0)

  ## ARV7.p1
  avg_data_arv7_p1_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/ARV7.p1/ARV7.p1_data_export.csv"
  avg_arv7_p1 = pd.read_csv(avg_data_arv7_p1_file, header=0).iloc[1:, :].reset_index(drop=True) # row 0 was NAN

  ## ARV7.p3
  avg_data_arv7_p3_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/ARV7.p3/ARV7.p3_data_export.csv"
  avg_arv7_p3 = pd.read_csv(avg_data_arv7_p3_file, header=0)

  ## ARV7.p4
  avg_data_arv7_p4_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/ARV7.p4/ARV7.p4_data_export.csv"
  avg_arv7_p4 = pd.read_csv(avg_data_arv7_p4_file, header=0)

  

In [None]:
## All pixel values for each time stamp

with tf.device(gpu):
  ## Gamma 1
  g1_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/100921_DNA/100921_DNA/Data/gamma1.app.1e5/gamma1.app.1e5_vsChem_export.csv"
  g1 = pd.read_csv(g1_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  g1.index = avg_g1["Time Elapsed"]

  ## Gamma 2
  g2_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/100921_DNA/100921_DNA/Data/gamma2.app.1e4/gamma2.app.1e4_vsChem_export.csv"
  g2 = pd.read_csv(g2_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  g2.index = avg_g2["Time Elapsed"]

  ## Gamma 3
  g3_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/100921_DNA/100921_DNA/Data/gamma3.app.1e5/gamma3.app.1e5_vsChem_export.csv"
  g3 = pd.read_csv(g3_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  g3.index = avg_g3["Time Elapsed"]

  ## Gamma 5
  g5_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/100921_DNA/100921_DNA/Data/gamma5.app.1e4/gamma5.app.1e4_vsChem_export.csv"
  g5 = pd.read_csv(g5_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  g5.index = avg_g5["Time Elapsed"]

  ## 22RV1.ap1
  rv1_ap1_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22RV1.ap1/22RV1.ap1_vsChem_export.csv"
  rv1_ap1 = pd.read_csv(rv1_ap1_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  rv1_ap1.index = avg_22rv1_ap1['Time Elapsed']

  ## 22RV1.ap2
  rv1_ap2_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22RV1.ap2/22RV1.ap2_vsChem_export.csv"
  rv1_ap2 = pd.read_csv(rv1_ap2_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  rv1_ap2.index = avg_22rv1_ap2['Time Elapsed']

  ## 22RV1y.p1
  rv1y_p1_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22Rv1y.p1/22Rv1y.p1_vsChem_export.csv"
  rv1y_p1 = pd.read_csv(rv1y_p1_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  rv1y_p1.index = avg_22rv1y_p1['Time Elapsed']

  ## 22RV1y.p3
  rv1y_p3_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22Rv1y.p3/22Rv1y.p3_vsChem_export.csv"
  rv1y_p3 = pd.read_csv(rv1y_p3_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  rv1y_p3.index = avg_22rv1y_p3['Time Elapsed']

  ## 22RV1y.p4
  rv1y_p4_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/22Rv1y.p4/22Rv1y.p4_vsChem_export.csv"
  rv1y_p4 = pd.read_csv(rv1y_p4_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  rv1y_p4.index = avg_22rv1y_p4['Time Elapsed']

  ## ARV7.p1 
  arv7_p1_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/ARV7.p1/ARV7.p1_vsChem_export.csv"
  arv7_p1 = pd.read_csv(arv7_p1_file, header=None).iloc[:, :(arr_rows*arr_cols)] 
  arv7_p1.index = avg_arv7_p1["Time Elapsed"]

  ## ARV7.p3 
  arv7_p3_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/ARV7.p3/ARV7.p3_vsChem_export.csv"
  arv7_p3 = pd.read_csv(arv7_p3_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  arv7_p3.index = avg_arv7_p3["Time Elapsed"]

  ## ARV7.p4 
  arv7_p4_file = "/content/drive/MyDrive/Final-Year-Project/DNAPositives/ARV7.p4/ARV7.p4_vsChem_export.csv"
  arv7_p4 = pd.read_csv(arv7_p4_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  arv7_p4.index = avg_arv7_p4["Time Elapsed"]



#### Negative Samples

In [None]:
## Average pixel value for all samples 

with tf.device(gpu):  
  ## ARV7.n1
  avg_data_arv7_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/ARV7.n1/ARV7.n1_data_export.csv"
  avg_arv7 = pd.read_csv(avg_data_arv7_file, header=0)

  ## Yap.n2
  avg_data_yap_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/yap.n2/yap.n2_data_export.csv"
  avg_yap = pd.read_csv(avg_data_yap_file, header=0)

  ## Yap1.n2
  avg_data_yap1_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/yap1.n2/yap1.n2_data_export.csv"
  avg_yap1 = pd.read_csv(avg_data_yap1_file, header=0).iloc[1:, :].reset_index() # row 0 was NAN

  ## Yap1.n1.1 
  avg_data_yap1n1_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/yap1.n1.1/yap1.n1.1_data_export.csv"
  avg_yap1n1 = pd.read_csv(avg_data_yap1n1_file, header=0).iloc[1:, :].reset_index() # row 0 was NAN

  ## ARV7.n2
  avg_data_arv72_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/ARV7.n2/ARV7.n2_data_export.csv"
  avg_arv72 = pd.read_csv(avg_data_arv72_file, header=0)

  ## ARV7.n3
  avg_data_arv73_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/ARV7.n3/ARV7.n3_data_export.csv"
  avg_arv73 = pd.read_csv(avg_data_arv73_file, header=0)

  ## DU145a.p1
  avg_data_du145a_p1_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/DU145a.p1/DU145a.p1_data_export.csv"
  avg_du145a_p1 = pd.read_csv(avg_data_du145a_p1_file, header=0)

  ## DU145a.p2
  avg_data_du145a_p2_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/DU145a.p2/DU145a.p2_data_export.csv"
  avg_du145a_p2 = pd.read_csv(avg_data_du145a_p2_file, header=0)

  ## DU145a.p3
  avg_data_du145a_p3_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/DU145a.p3/DU145a.p3_data_export.csv"
  avg_du145a_p3 = pd.read_csv(avg_data_du145a_p3_file, header=0)

  ## DU145y.n1
  avg_data_du145y_n1_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/DU145y.n1/DU145y.n1_data_export.csv"
  avg_du145y_n1 = pd.read_csv(avg_data_du145y_n1_file, header=0)

In [None]:
## All pixel values for each time stamp

with tf.device(gpu):   
  ## ARV7.n1 
  arv7_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/ARV7.n1/ARV7.n1_vsChem_export.csv"
  arv7 = pd.read_csv(arv7_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  arv7.index = avg_arv7["Time Elapsed"]

  ## Yap.n2
  yap_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/yap.n2/yap.n2_vsChem_export.csv"
  yap = pd.read_csv(yap_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  yap.index = avg_yap["Time Elapsed"]

  ## Yap1.n2
  yap1_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/yap1.n2/yap1.n2_vsChem_export.csv"
  yap1 = pd.read_csv(yap1_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  yap1.index = avg_yap1["Time Elapsed"]

  ## Yap1.n1.1
  yap1n1_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/yap1.n1.1/yap1.n1.1_vsChem_export.csv"
  yap1n1 = pd.read_csv(yap1n1_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  yap1n1.index = avg_yap1n1["Time Elapsed"]

  ## ARV7.n2
  arv72_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/ARV7.n2/ARV7.n2_vsChem_export.csv"
  arv72 = pd.read_csv(arv72_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  arv72.index = avg_arv72["Time Elapsed"]

  ## ARV7.n3
  arv73_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/ARV7.n3/ARV7.n3_vsChem_export.csv"
  arv73 = pd.read_csv(arv73_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  arv73.index = avg_arv73["Time Elapsed"]

  ## DU145a.p1
  du145a_p1_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/DU145a.p1/DU145a.p1_vsChem_export.csv"
  du145a_p1 = pd.read_csv(du145a_p1_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  du145a_p1.index = avg_du145a_p1["Time Elapsed"]

  ## DU145a.p2
  du145a_p2_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/DU145a.p2/DU145a.p2_vsChem_export.csv"
  du145a_p2 = pd.read_csv(du145a_p2_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  du145a_p2.index = avg_du145a_p2["Time Elapsed"]

  ## DU145a.p3
  du145a_p3_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/DU145a.p3/DU145a.p3_vsChem_export.csv"
  du145a_p3 = pd.read_csv(du145a_p3_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  du145a_p3.index = avg_du145a_p3["Time Elapsed"]

  ## DU145y.n1
  du145y_n1_file = "/content/drive/MyDrive/Final-Year-Project/DNANegatives/DU145y.n1/DU145y.n1_vsChem_export.csv"
  du145y_n1 = pd.read_csv(du145y_n1_file, header=None).iloc[:, :(arr_rows*arr_cols)]
  du145y_n1.index = avg_du145y_n1["Time Elapsed"]

#### Partial Covid Data

In [None]:
window_size = 101
polynomial_order = 3

In [None]:
## 150520_2_118
avg_118_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/150520_2_118/exp_summary_118.csv"
exp_118_pos, exp_118_neg = load_partial_covid_exp(avg_118_file, window_size, polynomial_order)

## 150520_4_2_86
avg_86_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/150520_4_2_86/exp_summary_86.csv"
exp_86_pos, exp_86_neg = load_partial_covid_exp(avg_86_file, window_size, polynomial_order)

## 150520_5_129
avg_129_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/150520_5_129/exp_summary_129.csv"
exp_129_pos, exp_129_neg = load_partial_covid_exp(avg_129_file, window_size, polynomial_order)

## 180520_4_165
avg_165_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/180520_4_165/exp_summary_165.csv"
exp_165_pos, exp_165_neg = load_partial_covid_exp(avg_165_file, window_size, polynomial_order)

## 180520_6_35
avg_35_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/180520_6_35/exp_summary_35.csv"
exp_35_pos, exp_35_neg = load_partial_covid_exp(avg_35_file, window_size, polynomial_order)

## 190520_1_28
avg_28_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/190520_1_28/exp_summary_28.csv"
exp_28_pos, exp_28_neg = load_partial_covid_exp(avg_28_file, window_size, polynomial_order) 

## 190520_2_14
avg_14_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/190520_2_14/exp_summary_14.csv"
exp_14_pos, exp_14_neg = load_partial_covid_exp(avg_14_file, window_size, polynomial_order)

## 210520_2_40
avg_40_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/210520_2_40/exp_summary_40.csv"
exp_40_pos, exp_40_neg = load_partial_covid_exp(avg_40_file, window_size, polynomial_order)

## 210520_3_88
avg_88_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/210520_3_88/exp_summary_88.csv"
exp_88_pos, exp_88_neg = load_partial_covid_exp(avg_88_file, window_size, polynomial_order)

## 210520_6_27
avg_27_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/210520_6_27/exp_summary_27.csv"
exp_27_pos, exp_27_neg = load_partial_covid_exp(avg_27_file, window_size, polynomial_order)

## 250520_1_134
avg_134_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/250520_1_134/exp_summary_134.csv"
exp_134_pos, exp_134_neg = load_partial_covid_exp(avg_134_file, window_size, polynomial_order)

## 250520_2_97
avg_97_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/250520_2_97/exp_summary_97.csv"
exp_97_pos, exp_97_neg = load_partial_covid_exp(avg_97_file, window_size, polynomial_order)

## 250520_6_2D1
avg_2d1_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/250520_6_2D1/exp_summary_2D1.csv"
exp_2d1_pos, exp_2d1_neg = load_partial_covid_exp(avg_2d1_file, window_size, polynomial_order)

## 250520_7_64
avg_64_file = "/content/drive/MyDrive/Final-Year-Project/COVIDPartialData/250520_7_64/exp_summary_64.csv"
exp_64_pos, exp_64_neg = load_partial_covid_exp(avg_64_file, window_size, polynomial_order)

### Preprocessing

#### Positive Samples

In [None]:
g1 = preprocess_data(g1, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
g2 = preprocess_data(g2, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
g3 = preprocess_data(g3, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
g5 = preprocess_data(g5, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
rv1_ap1 = preprocess_data(rv1_ap1, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
rv1_ap2 = preprocess_data(rv1_ap2, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
rv1y_p1 = preprocess_data(rv1y_p1, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
rv1y_p3 = preprocess_data(rv1y_p3, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
rv1y_p4 = preprocess_data(rv1y_p4, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
arv7_p1 = preprocess_data(arv7_p1, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
arv7_p3 = preprocess_data(arv7_p3, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True, deriv_thresh_bgsub=20)
arv7_p4 = preprocess_data(arv7_p4, 1000, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)

#### Negative Samples

In [None]:
arv7 = preprocess_data(arv7, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
yap = preprocess_data(yap, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
yap1 = preprocess_data(yap1, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
yap1n1 = preprocess_data(yap1n1, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
arv72 = preprocess_data(arv72, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
arv73 = preprocess_data(arv73, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
du145a_p1 = preprocess_data(du145a_p1, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
du145a_p2 = preprocess_data(du145a_p2, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
du145a_p3 = preprocess_data(du145a_p3, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)
du145y_n1 = preprocess_data(du145y_n1, 100, (600,900), arr_rows, arr_cols, average=True, drop_pixel=True)

### Machine Learning - Neural Network Ensemble

#### Helper Functions

In [None]:
def get_training_data(positive_samples, negative_samples, timestamp, test_samples=[]):
  
  training_data = []
  pos_count = 0
  neg_count = 0

  ## iterate postive samples dict
  for key, sample in positive_samples.items():

    ## if dataset is test data do not add to training set
    if(key in test_samples):
      continue

    ## truncate sample to length t = timestamp
    pos_subsample = sample.to_numpy()[0:timestamp]

    ## append subsample of length t to training data
    training_data.append(pos_subsample)
    pos_count += 1

  ## iterate negative samples dict
  for key, sample in negative_samples.items():

    ## if dataset is test data do not add to training set
    if(key in test_samples):
      continue

    ## truncate sample to length t = timestamp
    neg_subsample = sample.to_numpy()[0:timestamp]

    ## append subsample of length t to training data
    training_data.append(neg_subsample)
    neg_count += 1

  ## create positive and negative (1 and 0) label based on sample 
  pos_labels = np.ones(pos_count)
  neg_labels = np.zeros(neg_count)

  ## concatenate labels for final training labels
  training_labels = np.concatenate((pos_labels, neg_labels), axis=0)

  return np.asarray(training_data), training_labels ## np.asarry() converts list to 2D np array

In [None]:
def get_test_data(sample, timestamp):
  subsample = []
  subsample.append(sample.to_numpy()[0:timestamp])

  return np.asarray(subsample)

#### Training Data

In [None]:
positives = {"g1":g1['Average Output'], "g2":g2['Average Output'], "g3":g3['Average Output'], "g5":g5['Average Output'],
             "rv1_ap1":rv1_ap1['Average Output'], "rv1_ap2":rv1_ap2['Average Output'], "rv1y_p1":rv1y_p1['Average Output'],
             "rv1y_p3":rv1y_p3['Average Output'], "exp_118_pos":exp_118_pos['Average Output'], "exp_86_pos":exp_86_pos['Average Output'],
             "exp_129_pos":exp_129_pos['Average Output'], "exp_165_pos":exp_165_pos['Average Output'], 
             "exp_35_pos":exp_35_pos['Average Output'], "exp_28_pos":exp_28_pos['Average Output'], 
             "exp_14_pos":exp_14_pos['Average Output'], "exp_40_pos":exp_40_pos['Average Output'], "du145a_p1":du145a_p1['Average Output'], 
             "du145a_p2":du145a_p2['Average Output'], "du145a_p3":du145a_p3['Average Output'], "exp_88_pos":exp_88_pos['Average Output'],
             "exp_27_pos":exp_27_pos['Average Output'], "exp_134_pos":exp_134_pos['Average Output'], 
             "exp_97_pos":exp_97_pos['Average Output'], "exp_2d1_pos":exp_2d1_pos['Average Output'], 
             "exp_64_pos":exp_64_pos['Average Output'], "rv1y_p4":rv1y_p4['Average Output'],"arv7_p1":arv7_p1['Average Output'],
             "arv7_p3":arv7_p3['Average Output'], "arv7_p4":arv7_p4['Average Output']}

negatives = {"arv7":arv7['Average Output'], "yap":yap['Average Output'], "yap1":yap1['Average Output'], "yap1n1":yap1n1['Average Output'],
             "arv72":arv72['Average Output'], "arv73":arv73['Average Output'], "du145y_n1":du145y_n1['Average Output'],
             "exp_118_neg":exp_118_neg['Average Output'], "exp_86_neg":exp_86_neg['Average Output'],
             "exp_129_neg":exp_129_neg['Average Output'], "exp_165_neg":exp_165_neg['Average Output'], 
             "exp_35_neg":exp_35_neg['Average Output'], "exp_28_neg":exp_28_neg['Average Output'], 
             "exp_14_neg":exp_14_neg['Average Output'], "exp_40_neg":exp_40_neg['Average Output'], "exp_88_neg":exp_88_neg['Average Output'],
             "exp_27_neg":exp_27_neg['Average Output'], "exp_134_neg":exp_134_neg['Average Output'], 
             "exp_97_neg":exp_97_neg['Average Output'], "exp_2d1_neg":exp_2d1_neg['Average Output'], 
             "exp_64_neg":exp_64_neg['Average Output']}

#### Model Specs

In [None]:
number_of_samples = len(g1['Average Output'])
number_of_classifiers = 20

timestep = int(number_of_samples/number_of_classifiers)
timestamps = [*range(timestep, number_of_samples+timestep, timestep)]

batch_size = 3
epochs = 15
loss_function = 'binary_crossentropy'
optimiser = 'adam'

In [None]:
print(timestamps)

[20, 40, 60, 80, 100, 120, 140, 160, 180, 200, 220, 240, 260, 280, 300, 320, 340, 360, 380, 400]


#### Creating Ensemble

In [None]:
def create_ensemble(number_of_classifiers, batch_size, epochs, loss_function, optimiser, timestamps, test_samples):

  neural_nets = [0]*number_of_classifiers

  for i in range(number_of_classifiers):

    # print(f"============================================== Neural Network {i} ============================================")

    ## make model 
    neural_nets[i] = Sequential()
    neural_nets[i].add(Dense(16, activation='relu', input_dim = timestamps[i]))
    neural_nets[i].add(Dense(32, activation='relu'))
    neural_nets[i].add(Dense(32, activation='relu'))
    neural_nets[i].add(Dense(64, activation='relu'))
    neural_nets[i].add(Dense(64, activation='relu'))
    neural_nets[i].add(Dense(1, activation='sigmoid'))

    ## compile model 
    neural_nets[i].compile(loss=loss_function, optimizer=optimiser, metrics=['accuracy'])

    ## model summary
    # neural_nets[i].summary()

    ## training data
    training_data, training_label = get_training_data(positive_samples=positives, negative_samples=negatives, timestamp=timestamps[i], test_samples=[test_samples])

    ## train model
    callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)
    neural_nets[i].fit(training_data, training_label,  batch_size=batch_size, epochs=epochs, shuffle=True, callbacks=[callback], verbose=0)

    # print("\n\n")

  return neural_nets

#### Evaluating Ensemble

In [None]:
def get_prediction(ensemble, timestamps, test_sample):
  predictions = []

  for i in range(number_of_classifiers):
    test_data = get_test_data(test_sample, timestamps[i])
    prediction = ensemble[i].predict(test_data)
    predictions.append(prediction[0][0])

  predictions = [int(i >= 0.5) for i in predictions]
  classification = Counter(predictions).most_common(1)[0][0] # final prediction

  return classification

In [None]:
## combine positive and negative sample dicts
all_samples = {}
all_samples.update(positives)
all_samples.update(negatives)

## create dict of samples with tule label
keys = list(all_samples.keys())
true_labels = list(np.concatenate((np.ones(len(positives)),np.zeros(len(negatives)))))
true_label_dict = dict(zip(keys, true_labels))

In [None]:
final_classifications = {}

## use ensemble to evaluate the prediction for each of the samples individually
for key, value in all_samples.items():
  test_sample_name = key
  test_sample = value

  print(f"Testing sample: {test_sample_name}...")

  en = create_ensemble(number_of_classifiers, batch_size, epochs, loss_function, optimiser, timestamps, test_sample_name)
  classification = get_prediction(en, timestamps, test_sample)
  
  
  final_classifications[key] = (classification, true_label_dict[key])

Testing sample: g1...
Testing sample: g2...
Testing sample: g3...
Testing sample: g5...
Testing sample: rv1_ap1...
Testing sample: rv1_ap2...
Testing sample: rv1y_p1...
Testing sample: rv1y_p3...
Testing sample: exp_118_pos...
Testing sample: exp_86_pos...
Testing sample: exp_129_pos...
Testing sample: exp_165_pos...
Testing sample: exp_35_pos...
Testing sample: exp_28_pos...
Testing sample: exp_14_pos...
Testing sample: exp_40_pos...
Testing sample: du145a_p1...
Testing sample: du145a_p2...
Testing sample: du145a_p3...
Testing sample: exp_88_pos...
Testing sample: exp_27_pos...
Testing sample: exp_134_pos...
Testing sample: exp_97_pos...
Testing sample: exp_2d1_pos...
Testing sample: exp_64_pos...
Testing sample: rv1y_p4...
Testing sample: arv7_p1...
Testing sample: arv7_p3...
Testing sample: arv7_p4...
Testing sample: arv7...
Testing sample: yap...
Testing sample: yap1...
Testing sample: yap1n1...
Testing sample: arv72...
Testing sample: arv73...
Testing sample: du145y_n1...
Testing 

In [None]:
final_classifications

{'arv7': (1, 0.0),
 'arv72': (1, 0.0),
 'arv73': (1, 0.0),
 'arv7_p1': (0, 1.0),
 'arv7_p3': (1, 1.0),
 'arv7_p4': (1, 1.0),
 'du145a_p1': (1, 1.0),
 'du145a_p2': (1, 1.0),
 'du145a_p3': (1, 1.0),
 'du145y_n1': (1, 0.0),
 'exp_118_neg': (1, 0.0),
 'exp_118_pos': (1, 1.0),
 'exp_129_neg': (1, 0.0),
 'exp_129_pos': (0, 1.0),
 'exp_134_neg': (1, 0.0),
 'exp_134_pos': (0, 1.0),
 'exp_14_neg': (1, 0.0),
 'exp_14_pos': (1, 1.0),
 'exp_165_neg': (1, 0.0),
 'exp_165_pos': (1, 1.0),
 'exp_27_neg': (1, 0.0),
 'exp_27_pos': (1, 1.0),
 'exp_28_neg': (1, 0.0),
 'exp_28_pos': (1, 1.0),
 'exp_2d1_neg': (1, 0.0),
 'exp_2d1_pos': (1, 1.0),
 'exp_35_neg': (1, 0.0),
 'exp_35_pos': (1, 1.0),
 'exp_40_neg': (1, 0.0),
 'exp_40_pos': (1, 1.0),
 'exp_64_neg': (1, 0.0),
 'exp_64_pos': (0, 1.0),
 'exp_86_neg': (1, 0.0),
 'exp_86_pos': (1, 1.0),
 'exp_88_neg': (1, 0.0),
 'exp_88_pos': (1, 1.0),
 'exp_97_neg': (1, 0.0),
 'exp_97_pos': (1, 1.0),
 'g1': (1, 1.0),
 'g2': (1, 1.0),
 'g3': (1, 1.0),
 'g5': (1, 1.0),
 

In [None]:
print(f"Accuracy: {accuracy(final_classifications)}")
print(f"Sensitivity/Recall: {sensitivity(final_classifications)}")
print(f"Specificity: {specificity(final_classifications)}")
print(f"Precision: {precision(final_classifications)}")
print(f"F1 Score: {f1(final_classifications)}")

Accuracy: 48.0
Sensitivity/Recall: 82.75862068965517
Specificity: 0.0
Precision: 53.333333333333336
F1 Score: 64.86486486486487


In [None]:
# ## checking the timestap where majority of classifiers agree

# from collections import defaultdict

# def get_timestamp(timestamps, predictions):

#   ## create dict to hold count of predictions
#   label_counters = defaultdict(int)

#   ## add entries to dict
#   for index, pred in enumerate(predictions):
#     label_counters[pred] += 1

#     ## if label count == half of total possible predictions then majority is achieved
#     if(label_counters[pred] == int(len(predictions)/2)+1):
#       return timestamps[index], index
  
#   return -1, -1


In [None]:
# print(f"Timestamp where majority aggement is reached: {timestamp_final}")
# print(f"Index of final time stamp in array : {pred_index}")

Timestamp where majority aggement is reached: 352
Index of final time stamp in array : 43


#### Save Ensemble

In [None]:
# !pwd

In [None]:
## File paths to save models - uncomment as needed

# %cd /content/drive/MyDrive/Final-Year-Project/EnsembleModels50/G1Test
# %cd /content/drive/MyDrive/Final-Year-Project/EnsembleModels50/G2Test
# %cd /content/drive/MyDrive/Final-Year-Project/EnsembleModels50/G3Test
# %cd /content/drive/MyDrive/Final-Year-Project/EnsembleModels50/G5Test

In [None]:
## File paths to save models - uncomment as needed

# %cd /content/drive/MyDrive/Final-Year-Project/EnsembleModels50/ARV7Test/
# %cd /content/drive/MyDrive/Final-Year-Project/EnsembleModels50/YAPTest/
# %cd /content/drive/MyDrive/Final-Year-Project/EnsembleModels50/YAP1Test/
# %cd /content/drive/MyDrive/Final-Year-Project/EnsembleModels50/YAP1N1Test/

In [None]:
# for i in range(number_of_classifiers):
#   filename = f"ensemble-model-{i}.h5"
#   neural_nets[i].save(filename)

#   print(f"Saved {filename}")

### Machine Learning - KNN

#### Helper Functions

In [None]:
def get_training_data_knn(positive_samples, negative_samples, timestamp, test_samples=[]):
  
  training_data = []
  pos_count = 0
  neg_count = 0

  ## iterate postive samples dict
  for key, sample in positive_samples.items():

    ## if dataset is test data do not add to training set
    if(key in test_samples):
      continue

    ## truncate sample to length t = timestamp
    pos_subsample = sample.to_numpy()[0:timestamp]

    ## append subsample of length t to training data
    training_data.append(pos_subsample)
    pos_count += 1

  ## iterate negative samples dict
  for key, sample in negative_samples.items():

    ## if dataset is test data do not add to training set
    if(key in test_samples):
      continue

    ## truncate sample to length t = timestamp
    neg_subsample = sample.to_numpy()[0:timestamp]

    ## append subsample of length t to training data
    training_data.append(neg_subsample)
    neg_count += 1

  ## create positive and negative (1 and 0) label based on sample 
  pos_labels = np.ones(pos_count)
  neg_labels = np.zeros(neg_count)

  ## concatenate labels for final training labels
  training_labels = np.concatenate((pos_labels, neg_labels), axis=0)

  return np.asarray(training_data), training_labels ## np.asarry() converts list to 2D np array

In [None]:
def get_test_data_knn(sample, timestamp):
  subsample = []
  subsample.append(sample.to_numpy()[0:timestamp])

  return np.asarray(subsample)

#### Training Data

In [None]:
positives = {"g1":g1['Average Output'], "g2":g2['Average Output'], "g3":g3['Average Output'], "g5":g5['Average Output'],
             "rv1_ap1":rv1_ap1['Average Output'], "rv1_ap2":rv1_ap2['Average Output'], "rv1y_p1":rv1y_p1['Average Output'],
             "rv1y_p3":rv1y_p3['Average Output'], "exp_118_pos":exp_118_pos['Average Output'], "exp_86_pos":exp_86_pos['Average Output'],
             "exp_129_pos":exp_129_pos['Average Output'], "exp_165_pos":exp_165_pos['Average Output'], 
             "exp_35_pos":exp_35_pos['Average Output'], "exp_28_pos":exp_28_pos['Average Output'], 
             "exp_14_pos":exp_14_pos['Average Output'], "exp_40_pos":exp_40_pos['Average Output'], "du145a_p1":du145a_p1['Average Output'], 
             "du145a_p2":du145a_p2['Average Output'], "du145a_p3":du145a_p3['Average Output'], "exp_88_pos":exp_88_pos['Average Output'],
             "exp_27_pos":exp_27_pos['Average Output'], "exp_134_pos":exp_134_pos['Average Output'], 
             "exp_97_pos":exp_97_pos['Average Output'], "exp_2d1_pos":exp_2d1_pos['Average Output'], 
             "exp_64_pos":exp_64_pos['Average Output'], "rv1y_p4":rv1y_p4['Average Output'],"arv7_p1":arv7_p1['Average Output'],
             "arv7_p3":arv7_p3['Average Output'], "arv7_p4":arv7_p4['Average Output']}

negatives = {"arv7":arv7['Average Output'], "yap":yap['Average Output'], "yap1":yap1['Average Output'], "yap1n1":yap1n1['Average Output'],
             "arv72":arv72['Average Output'], "arv73":arv73['Average Output'], "du145y_n1":du145y_n1['Average Output'],
             "exp_118_neg":exp_118_neg['Average Output'], "exp_86_neg":exp_86_neg['Average Output'],
             "exp_129_neg":exp_129_neg['Average Output'], "exp_165_neg":exp_165_neg['Average Output'], 
             "exp_35_neg":exp_35_neg['Average Output'], "exp_28_neg":exp_28_neg['Average Output'], 
             "exp_14_neg":exp_14_neg['Average Output'], "exp_40_neg":exp_40_neg['Average Output'], "exp_88_neg":exp_88_neg['Average Output'],
             "exp_27_neg":exp_27_neg['Average Output'], "exp_134_neg":exp_134_neg['Average Output'], 
             "exp_97_neg":exp_97_neg['Average Output'], "exp_2d1_neg":exp_2d1_neg['Average Output'], 
             "exp_64_neg":exp_64_neg['Average Output']}

#### Timestamps

In [None]:
number_of_samples = len(g1['Average Output'])
number_of_timestamps = 50

timestep = int(number_of_samples/number_of_timestamps)
timestamps = [*range(timestep, number_of_samples+timestep, timestep)]

In [None]:
print(timestamps)

[8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376, 384, 392, 400]


#### Model

In [None]:
def KNN(k, test_sample, train_data, train_labels, distance_metric):
  test = np.tile(test_sample, (len(train_data),1)) # repeat test sample and stack vertically
  
  distances = None

  if(distance_metric.lower() == 'manhattan' or distance_metric.lower() == 'cityblock'):
    distances = manhattan_distances(test, train_data).diagonal() # get pair wise manhattan distance for every row
  elif(distance_metric.lower() == 'euclidean'):
    distances = euclidean_distances(test, train_data).diagonal() # get pair wise euclidean distance for every row 
  elif(distance_metric.lower() == 'cosine'):
    distances = cosine_distances(test, train_data).diagonal() # get pair wise cosine distance for every row 

  min_indexes = np.argsort(distances)[:k] # get k smallest indexes

  knn_labels = list(train_labels[min_indexes]) # get k predictions
  final_pred = max(set(knn_labels), key=knn_labels.count)

  return final_pred

#### Model Predictions

In [None]:
## combine positive and negative sample dicts
all_samples = {}
all_samples.update(positives)
all_samples.update(negatives)

## create dict of samples with tule label
keys = list(all_samples.keys())
true_labels = list(np.concatenate((np.ones(len(positives)),np.zeros(len(negatives)))))
true_label_dict = dict(zip(keys, true_labels))

In [None]:
final_classifications = {}

## use KNN to evaluate the prediction for each of the samples individually
for key, value in all_samples.items():
  test_sample_name = key
  test_sample = value

  predictions = []
  for t in timestamps:
    train_data, train_labels = get_training_data_knn(positive_samples=positives, negative_samples=negatives, timestamp=t, test_samples=[test_sample_name])
    test_data = get_test_data_knn(test_sample, t)
    pred = KNN(1, test_data, train_data, train_labels, 'cosine')
    predictions.append(pred)
  
  classification = Counter(predictions).most_common(1)[0][0] # final prediction
  final_classifications[key] = (classification, true_label_dict[key])

In [None]:
final_classifications

{'arv7': (0.0, 0.0),
 'arv72': (0.0, 0.0),
 'arv73': (0.0, 0.0),
 'arv7_p1': (0.0, 1.0),
 'arv7_p3': (0.0, 1.0),
 'arv7_p4': (0.0, 1.0),
 'du145a_p1': (1.0, 1.0),
 'du145a_p2': (0.0, 1.0),
 'du145a_p3': (1.0, 1.0),
 'du145y_n1': (1.0, 0.0),
 'exp_118_neg': (1.0, 0.0),
 'exp_118_pos': (0.0, 1.0),
 'exp_129_neg': (0.0, 0.0),
 'exp_129_pos': (1.0, 1.0),
 'exp_134_neg': (1.0, 0.0),
 'exp_134_pos': (1.0, 1.0),
 'exp_14_neg': (0.0, 0.0),
 'exp_14_pos': (1.0, 1.0),
 'exp_165_neg': (1.0, 0.0),
 'exp_165_pos': (1.0, 1.0),
 'exp_27_neg': (0.0, 0.0),
 'exp_27_pos': (1.0, 1.0),
 'exp_28_neg': (1.0, 0.0),
 'exp_28_pos': (1.0, 1.0),
 'exp_2d1_neg': (1.0, 0.0),
 'exp_2d1_pos': (1.0, 1.0),
 'exp_35_neg': (0.0, 0.0),
 'exp_35_pos': (0.0, 1.0),
 'exp_40_neg': (0.0, 0.0),
 'exp_40_pos': (0.0, 1.0),
 'exp_64_neg': (0.0, 0.0),
 'exp_64_pos': (1.0, 1.0),
 'exp_86_neg': (0.0, 0.0),
 'exp_86_pos': (1.0, 1.0),
 'exp_88_neg': (1.0, 0.0),
 'exp_88_pos': (0.0, 1.0),
 'exp_97_neg': (1.0, 0.0),
 'exp_97_pos': (1.0,

In [None]:
print(f"Accuracy: {accuracy(final_classifications)}")
print(f"Sensitivity/Recall: {sensitivity(final_classifications)}")
print(f"Specificity: {specificity(final_classifications)}")
print(f"Precision: {precision(final_classifications)}")
print(f"F1 Score: {f1(final_classifications)}")

Accuracy: 59.183673469387756
Sensitivity/Recall: 64.28571428571429
Specificity: 52.38095238095239
Precision: 64.28571428571429
F1 Score: 64.28571428571429


### Github Commands

In [86]:
!pwd

/content/drive/MyDrive/Final-Year-Project


In [93]:
username = "adityag16"
git_token = "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"
repository = "/content/drive/MyDrive/Final-Year-Project"
%cd {repository}
!git status

/content/drive/MyDrive/Final-Year-Project
On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add/rm <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	[31mdeleted:    Early Time Series Classification with Confidence Metric.ipynb[m
	[31mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-0.h5[m
	[31mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-1.h5[m
	[31mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-10.h5[m
	[31mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-11.h5[m
	[31mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-12.h5[m
	[31mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-13.h5[m
	[31mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-14.h5[m
	[31mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-15.h5[m
	[31mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-16.h5[m
	[31mdeleted:    EnsembleModel

In [95]:
!git add .
!git status

On branch main
Your branch is up to date with 'origin/main'.

Changes to be committed:
  (use "git reset HEAD <file>..." to unstage)

	[32mnew file:   Best Performances.docx[m
	[32mnew file:   Early Time Series Classification - Average Ouput.ipynb[m
	[32mdeleted:    Early Time Series Classification with Confidence Metric.ipynb[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-0.h5[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-1.h5[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-10.h5[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-11.h5[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-12.h5[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-13.h5[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-14.h5[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-15.h5[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-model-16.h5[m
	[32mdeleted:    EnsembleModels50/ARV7Test/ensemble-

In [96]:
!git config --global user.email "aditya.gupta18@imperial.ac.uk"
!git config --global user.name "adityag16"

!git commit -m "Added new notebook for pixel by pixel training of neural network + best performnace tracker + removed old saved models"
!git push origin main

[main 1418b9d] Added new notebook for pixel by pixel training of neural network + best performnace tracker + removed old saved models
 404 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 Best Performances.docx
 create mode 100644 Early Time Series Classification - Average Ouput.ipynb
 delete mode 100644 Early Time Series Classification with Confidence Metric.ipynb
 delete mode 100644 EnsembleModels50/ARV7Test/ensemble-model-0.h5
 delete mode 100644 EnsembleModels50/ARV7Test/ensemble-model-1.h5
 delete mode 100644 EnsembleModels50/ARV7Test/ensemble-model-10.h5
 delete mode 100644 EnsembleModels50/ARV7Test/ensemble-model-11.h5
 delete mode 100644 EnsembleModels50/ARV7Test/ensemble-model-12.h5
 delete mode 100644 EnsembleModels50/ARV7Test/ensemble-model-13.h5
 delete mode 100644 EnsembleModels50/ARV7Test/ensemble-model-14.h5
 delete mode 100644 EnsembleModels50/ARV7Test/ensemble-model-15.h5
 delete mode 100644 EnsembleModels50/ARV7Test/ensemble-model-16.h5
 delete mode 10