# Interpreter Network (15x1 neurons)
To run this notebook, run each cell. This will train the interpeter network on the test data predictions from the contributing networks in the ensemble.

In [1]:
import keras
import numpy as np
import pandas as pd
import tensorflow as tf
import os
from sklearn.linear_model import LinearRegression
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, Input
import h5py

In [2]:
os.chdir('/content/drive/My Drive/github/NNanobody/')

In [3]:
def load_all_models():
  """
  load_all_models loads all the models in the ensemble by their weights.
  
  :return: Dictionary containing the model and its weights in order to make predictions
  """
  model_list = {}
  for dataset in ['Full Regression','Hold out Regression','Hold out Top 4%']:
    for loaded_model in ['seq_32x1_16', 'seq_64x1_16','seq_32x2_16','seq_32_32','seq_32x1_16_filt3','seq_emb_32x1_16']:
      model = keras.models.load_model(f'./weights/regression/{dataset}/{loaded_model}')
      model_list[dataset + ' ' + loaded_model] = model

  return model_list

In [5]:
models = load_all_models()

fi = h5py.File('./data/regression/Test set Regression/test.h5.batch1', 'r') # Load data for training
X_train = np.asarray(fi['data'])[:10000]
Y_train = np.asarray(fi['label'])[:10000]

X_test = np.asarray(fi['data'])[-5000:]
Y_test = np.asarray(fi['label'])[-5000:]

In [6]:
def get_stacked_prediction(model_list, dataset):
  """
  get_stacked_prediction stacks the each model's predictions for the test data into a numpy array for training the interpreter.
  
  :param model_list: List of models for predictions
  :param dataset: CDR3 sequences from which models make predictions on
  :return: Each model's predictions for the CDR3 sequences of the test set as a matrix
  """
  output = []
  for idx, model in enumerate(model_list.values()):
    cur_pred = model.predict(dataset).flatten()
    output.append([])
    output[idx].append(cur_pred)
  
  output = np.array(output)
  return output.reshape(output.shape[0], output.shape[2]).T

In [7]:
output = get_stacked_prediction(models, X_train)
stacked_dataset = pd.DataFrame(columns=list(models.keys()), data=output)

In [None]:
# Feed-forward neural network to weigh each model's predictions accurately
model = Sequential()
model.add(Input(shape=(18,)))
model.add(Dense(15, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])

result = model.fit(x=stacked_dataset.values, y=Y_train, batch_size=100, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model.save('./weights/interpret/Hold out Regression/interpret_15x1')

INFO:tensorflow:Assets written to: ./weights/interpret/Hold out Regression/interpret_15x1/assets


INFO:tensorflow:Assets written to: ./weights/interpret/Hold out Regression/interpret_15x1/assets
