# Imports

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools

import tensorflow as tf
from keras.models import load_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Dropout, MaxPool1D, ReLU, Flatten, BatchNormalization

from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve, accuracy_score, mean_absolute_error, mean_squared_error

In [None]:
no_of_params = 2

In [None]:
real_data_directory = "/content/gdrive/MyDrive/GW_SOP/Data/better_results/"
gen_data_directory = "/content/gdrive/MyDrive/GW_SOP/Data/"
model1_directory = "/content/gdrive/MyDrive/GW_SOP/Code/debugging/1D-CNN Binary Classification/Signal_HIDDEN_in_strain_data/ALIGO_noise/Test 5/data/"
model2_directory = "/content/gdrive/MyDrive/GW_SOP/Code/debugging/1D-CNN BBH Parameter Estimation/"+str(no_of_params)+"_parameters/Signal_HIDDEN_in_strain_data/ALIGO_noise/Test 5 alternate/data/"

In [None]:
idx =   [ 23, 10, 40, 37, 3, 15, 36, 20, 25, 11, 6, 45, 47, 44, 4, 43, 16, 19, 12, 22, 
          28, 31, 8, 33, 2, 48, 38, 42, 30, 1, 35, 34, 46, 5, 18, 21, 26, 14, 17, 27,
          24, 7, 9, 13, 41, 32, 29, 39
        ]

bbh_events= ['GW190519_153544',
             'GW170823',
             'GW190814',
             'GW190728_064510',
             'GW151226',
             'GW190421_213856',
             'GW190727_060333',
             'GW190513_205428',
             'GW190521_074359',
             'GW190408_181802',
             'GW170729',
             'GW190915_235702',
             'GW190929_012149',
             'GW190910_112807',
             'GW170104',
             'GW190909_114149',
             'GW190424_180648',
             'GW190512_180714',
             'GW190412',
             'GW190517_055101',
             'GW190620_030421',
             'GW190706_222641',
             'GW170814',
             'GW190708_232457',
             'GW151012',
             'GW190930_133541',
             'GW190731_140936',
             'GW190828_065509',
             'GW190701_203306',
             'GW150914',
             'GW190720_000836',
             'GW190719_215514',
             'GW190924_021846',
             'GW170608',
             'GW190503_185404',
             'GW190514_065416',
             'GW190527_092055',
             'GW190413_134308',
             'GW190425',
             'GW190602_175927',
             'GW190521',
             'GW170809',
             'GW170818',
             'GW190413_052954',
             'GW190828_063405',
             'GW190707_093326',
             'GW190630_185205',
             'GW190803_022701'
             ]

# Testing on unseen generated data

## Level 1

In [None]:
noise_df = pd.read_csv(gen_data_directory + "val_Final_Merged_Noise_Reduced_No_ABS_" + str(no_of_params) + "_parameters.csv", header=None)
noise = noise_df.values.astype(float)

data_BBH_df = pd.read_csv(gen_data_directory + "val_Final_BBH_Merged_Noise_Signal_Reduced_No_ABS_"+str(no_of_params)+"_parameters.csv", header=None)
data_BBH = data_BBH_df.values.astype(float)

data_BBH_df_targets = pd.read_csv(gen_data_directory + "val_Final_BBH_Merged_Noise_Signal_Targets_"+str(no_of_params)+"_parameters.csv", header=None)
data_BBH_targets = data_BBH_df_targets.values.astype(float)

In [None]:
X = np.concatenate((noise, data_BBH), axis=0)
print(X.shape)

In [None]:
X *= 1e19
print(X)

In [None]:
y = [[0, -1] for i in range(1000)] + [[1, 0] for i in range(1000)]
y = np.array(y)
print(y)

In [None]:
X = np.expand_dims(X, axis=-1)
print(X.shape)

In [None]:
X_test_l1, tmp1, y, tmp2 = train_test_split(X, y, test_size=1)

X_test_l1 = np.concatenate((X_test_l1, tmp1), axis = 0)
y = np.concatenate((y, tmp2), axis = 0)

y_l2 = [y[i,1] for i in range(len(y))]
y_test_l1 = [y[i,0] for i in range(len(y))]

In [None]:
print(y_test_l1)

In [None]:
model_l1 = load_model(model1_directory + "model_trial_1.h5")

In [None]:
pred_l1 = model_l1.predict(X_test_l1)

In [None]:
# y_pred = np.argmax(pred_l1, axis = 1)
# for i in range(2999):
#   print(y_test_l1[i], y_pred[i])

In [None]:
y_pred = np.argmax(pred_l1, axis = 1)
correct_l1 = 0
X_test_l2 = []

for i in range(len(y_test_l1)):
  if y_pred[i]==y_test_l1[i]:
    correct_l1 += 1

  if y_pred[i]==1 and y_l2[i]!=-1:
    X_test_l2.append(X_test_l1[i])

acc = (correct_l1/len(y_test_l1))*100
print(f"Total no. of data samples in level 1: {len(y)}")
print(f"No. of correct predictions: {correct_l1}")
print(f"Level 1 accuracy: {acc}")
print(f"No. of data samples sent to level 2 model: {len(X_test_l2)}")

In [None]:
precision = precision_score(y_test_l1, y_pred, average='binary')
print('Precision: %.3f' % precision)

recall = recall_score(y_test_l1, y_pred, average='binary')
print('Recall: %.3f' % recall)

score = f1_score(y_test_l1, y_pred, average='binary')
print('F1-Score: %.3f' % score)

cm = confusion_matrix(y_test_l1, y_pred, labels=[0, 1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
disp.plot()
plt.title('Acc.:'+str(round(acc, 3))+'   Prec.:'+str(round(precision, 3))+'   Recall:'+str(round(recall, 3))+'   F1:'+str(round(score, 3)))
plt.savefig(model2_directory[:-5]+"screenshots/gen_test_cm_1_level_1.png")
plt.show()

In [None]:
pos_probs = pred_l1[:,1]
fpr, tpr, _ = roc_curve(y_test_l1, pos_probs)
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.savefig(model2_directory[:-5]+"screenshots/gen_test_roc_1_level_1.png")
plt.show()

## Level 2 (Parameter Estimation)

In [None]:
X_test_l2 = np.array(X_test_l2)
y_test_l2 = data_BBH_targets

print(X_test_l2.shape)
print(y_test_l2.shape)

In [None]:
model2 = load_model(model2_directory + "model_trial_1.h5")

In [None]:
pred_l2 = model2.predict(X_test_l2)

In [None]:
y_pred = pred_l2
y_true = y_test_l2

print(f"Total no. of data samples in level 2: {len(y_test_l2)}")

mse = mean_squared_error(y_true, y_pred)
print('Mean Squared Error: %.3f' % mse)

mae = mean_absolute_error(y_true, y_pred)
print('Mean Absolute Error: %.3f' % mae)

# Testing on Real data

## Level 1

In [None]:
noise_df = pd.read_csv(real_data_directory+"Noise.csv", header=None)
noise = noise_df.values.astype(float)


# data_BBH_df = pd.read_csv(real_data_directory+"Processed_BBH_Merged_original.csv", header=None)
data_BBH_df = pd.read_csv(real_data_directory+"latest data/Scaled_Processed_BBH_Merged.csv", header=None)
# data_BBH_df = pd.read_csv(real_data_directory+"latest data/Scaled_Sliced_BBH_Merged.csv", header=None)
data_BBH = data_BBH_df.values.astype(float)

In [None]:
X = np.concatenate((noise, data_BBH), axis=0)
print(X.shape)

In [None]:
X *= 1e19
print(X)

In [None]:
y = [[0, -1] for i in range(len(noise))] + [[1, 0] for i in range(len(data_BBH))]
y = np.array(y)
print(y)

In [None]:
X_test_l1 = np.expand_dims(X, axis=-1)
print(X_test_l1.shape)

In [None]:
y_l2 = [y[i,1] for i in range(len(y))]
y_test_l1 = [y[i,0] for i in range(len(y))]

In [None]:
print(y_l2)

In [None]:
print(y_test_l1)

In [None]:
model_l1 = load_model(model1_directory + "model_trial_1.h5")

In [None]:
pred_l1 = model_l1.predict(X_test_l1)

In [None]:
y_pred = np.argmax(pred_l1, axis = 1)
correct_l1 = 0
X_test_l2 = []
name = []
sample_idx = []

for i in range(len(y_test_l1)):
  if y_pred[i]==y_test_l1[i]:
    correct_l1 += 1

  if y_pred[i]==1 and y_l2[i]!=-1:
    X_test_l2.append(X_test_l1[i])
    name.append(bbh_events[i-24])
    sample_idx.append(i)

acc = (correct_l1/len(y_test_l1))*100
print(f"Total no. of data samples in level 1: {len(y)}")
print(f"No. of correct predictions: {correct_l1}")
print(f"Level 1 accuracy: {acc}")
print(f"No. of data samples sent to level 2 model: {len(X_test_l2)}")

In [None]:
precision = precision_score(y_test_l1, y_pred, average='binary')
print('Precision: %.3f' % precision)

recall = recall_score(y_test_l1, y_pred, average='binary')
print('Recall: %.3f' % recall)

score = f1_score(y_test_l1, y_pred, average='binary')
print('F1-Score: %.3f' % score)

cm = confusion_matrix(y_test_l1, y_pred, labels=[0, 1])
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
disp.plot()
plt.title('Acc.:'+str(round(acc, 3))+'   Prec.:'+str(round(precision, 3))+'   Recall:'+str(round(recall, 3))+'   F1:'+str(round(score, 3)))
plt.savefig(model2_directory[:-5]+"screenshots/real_cm_1_level_1.png")
plt.show()

In [None]:
pos_probs = pred_l1[:,1]
fpr, tpr, _ = roc_curve(y_test_l1, pos_probs)
plt.plot(fpr, tpr)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.savefig(model2_directory[:-5]+"screenshots/real_roc_1_level_1.png")
plt.show()

## Level 2

In [None]:
X_test_l2 = np.array(X_test_l2)
y_test_l2 = np.array(
    [
        [66.0, 40.5],
        [39.5, 29.0],
        [23.2, 2.59],
        [12.3, 8.1],
        [13.7, 7.7],
        [41.3, 31.9],
        [38.0, 29.4],
        [35.7, 18.0],
        [42.2, 32.8],
        [24.6, 18.4],
        [50.2, 34.0],
        [35.3, 24.4],
        [80.8, 24.1],
        [43.9, 35.6],
        [30.8, 20.0],
        [45.8, 28.3],
        [40.5, 31.8],
        [23.3, 12.6],
        [30.1, 8.3],
        [37.4, 25.3],
        [57.1, 35.5],
        [67.0, 38.2],
        [30.6, 25.2],
        [17.6, 13.2],
        [23.2, 13.6],
        [12.3, 7.8],
        [41.5, 28.8],
        [24.1, 10.2],
        [53.9, 40.8],
        [35.6, 30.6],
        [13.4, 7.8],
        [36.5, 20.8],
        [8.9, 5.0],
        [11.0, 7.6],
        [43.3, 28.4],
        [39.0, 28.4],
        [36.5, 22.6],
        [47.5, 31.8],
        [2.0, 1.4],
        [69.1, 47.8],
        [95.3, 69.0],
        [35.0, 23.8],
        [35.4, 26.7],
        [34.7, 23.7],
        [32.1, 26.2],
        [11.6, 8.4],
        [35.1, 23.7],
        [37.3, 27.3]    
    ]        
)
print(X_test_l2.shape)

In [None]:
model2 = load_model(model2_directory + "model_trial_1.h5")

In [None]:
pred_l2 = model2.predict(X_test_l2)

In [None]:
y_pred = pred_l2
y_true = [y_test_l2[i - 24] for i in sample_idx]

print(f"Total no. of data samples in level 2: {len(y_true)}")

mse = mean_squared_error(y_true, y_pred)
print('Mean Squared Error: %.3f' % mse)

mae = mean_absolute_error(y_true, y_pred)
print('Mean Absolute Error: %.3f' % mae)

In [None]:
results = np.zeros((len(X_test_l2), 5))
df = pd.DataFrame(results, columns=['True m1', 'Pred m1', 'True m2', 'Pred m2', 'Sum of Absolute Errors'])

for i in range(len(y_true)):
  df.iloc[i, 0] = y_true[i][0]
  df.iloc[i, 1] = y_pred[i][0]
  df.iloc[i, 2] = y_true[i][1]
  df.iloc[i, 3] = y_pred[i][1]
  df.iloc[i, 4] = sum(abs(y_true[i] - y_pred[i]))

df.to_csv(model2_directory[:-5]+"data/Real Data Results/results.csv", index=False) 