In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

In [2]:
loaded_toxcast_features_train_val_test = np.load('../preprocessing/preprocessed_data/toxcast_features_train_val_test.npy', allow_pickle=True).item()

loaded_toxcast_ecfp_descr_quantiles_scaled_X_train = loaded_toxcast_features_train_val_test['train']
loaded_toxcast_ecfp_descr_quantiles_scaled_X_val = loaded_toxcast_features_train_val_test['validation']
loaded_toxcast_ecfp_descr_quantiles_scaled_X_test = loaded_toxcast_features_train_val_test['test']

In [3]:
toxcast_features = np.concatenate((loaded_toxcast_ecfp_descr_quantiles_scaled_X_train,
                                  loaded_toxcast_ecfp_descr_quantiles_scaled_X_val,
                                  loaded_toxcast_ecfp_descr_quantiles_scaled_X_test), axis=0)

In [4]:
toxcast_features.shape

(8595, 2248)

In [5]:
loaded_toxcast_labels_train_val_test = np.load('../preprocessing/preprocessed_data/toxcast_labels_train_val_test.npy', allow_pickle=True).item()

loaded_toxcast_y_train = loaded_toxcast_labels_train_val_test['train']
loaded_toxcast_y_val = loaded_toxcast_labels_train_val_test['validation']
loaded_toxcast_y_test = loaded_toxcast_labels_train_val_test['test']

In [6]:
toxcast_labels = np.concatenate((loaded_toxcast_y_train,
                                  loaded_toxcast_y_val,
                                  loaded_toxcast_y_test), axis=0)

In [7]:
toxcast_labels.shape

(8595, 617)

In [8]:
toxcast_labels_val = toxcast_labels[:,:308]
toxcast_labels_test = toxcast_labels[:,308:617]

In [9]:
toxcast_labels_val.shape, toxcast_labels_test.shape

((8595, 308), (8595, 309))

In [10]:
def split_indices_for_task(task_labels):
    active_indices = np.where(task_labels == 1)[0]
    inactive_indices = np.where(task_labels == 0)[0]

    np.random.seed(42)
    selected_active = np.random.choice(active_indices, size=5, replace=False)
    selected_inactive = np.random.choice(inactive_indices, size=5, replace=False)

    rest_indices = np.setdiff1d(np.arange(task_labels.shape[0]), np.concatenate([selected_active, selected_inactive]))

    return selected_active, selected_inactive, rest_indices

def process_indices_for_all_tasks(toxcast_labels_val):
    all_tasks_indices = []
    
    for task in range(toxcast_labels_val.shape[1]):
        task_labels = toxcast_labels_val[:, task]

        selected_active, selected_inactive, rest_indices = split_indices_for_task(task_labels)

        task_indices = {
            "task": task,
            "indices_active": selected_active,
            "indices_inactive": selected_inactive,
            "indices_rest": rest_indices
        }
        
        all_tasks_indices.append(task_indices)
    
    return all_tasks_indices

In [11]:
val_tasks_indices = process_indices_for_all_tasks(toxcast_labels_val)
test_tasks_indices = process_indices_for_all_tasks(toxcast_labels_test)

In [12]:
val_tasks_indices[0]

{'task': 0,
 'indices_active': array([6522, 2862, 1497, 7059, 1526], dtype=int64),
 'indices_inactive': array([6568, 1766, 2715, 6455, 3518], dtype=int64),
 'indices_rest': array([   0,    1,    2, ..., 8592, 8593, 8594])}

In [13]:
test_tasks_indices[0]

{'task': 0,
 'indices_active': array([6210, 4636, 6840, 1168, 3190], dtype=int64),
 'indices_inactive': array([7281, 1869, 6455, 3755, 1701], dtype=int64),
 'indices_rest': array([   0,    1,    2, ..., 8592, 8593, 8594])}

In [14]:
def create_layer(in_size, out_size, activation_function=nn.ReLU, p=0.25):
    return nn.Sequential(
        nn.AlphaDropout(p=p),
        nn.Linear(in_size, out_size),
        activation_function()
    )


class DNN(nn.Module):
    def __init__(self, input_layer, hidden_layers, output_layer, activation_function=nn.ReLU, p=0.25):
        super().__init__()
        
        self.input_layer = nn.Linear(input_layer, hidden_layers[0])
        self.act1 = activation_function()
        self.dropout1 = nn.AlphaDropout(p=p)

        layers = [create_layer(hl_in, hl_out, activation_function, p) for hl_in, hl_out in zip(hidden_layers, hidden_layers[1:])]
        
        self.hidden_layers = nn.Sequential(*layers)
        
        self.output_layer = nn.Linear(hidden_layers[-1], output_layer)
        
    def forward(self, x):
        x = self.dropout1(x)
        x = self.act1(self.input_layer(x))
        x = self.hidden_layers(x)
        x = self.output_layer(x)
        return x

class LRNN(nn.Module):
    def __init__(self, feature_extractor, output_size=1):
        super().__init__()
        self.input_layer = feature_extractor.input_layer
        self.act1 = feature_extractor.act1
        self.dropout1 = feature_extractor.dropout1
        self.hidden_layers = feature_extractor.hidden_layers

        self.output_layer2 = nn.Linear(feature_extractor.hidden_layers[-1][1].out_features, output_size)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.dropout1(x)
        x = self.act1(self.input_layer(x))
        x = self.hidden_layers(x)
        x = self.output_layer2(x)
        x = self.sigmoid(x)
        return x


params = {'input_size': 2248,
          'hidden_layers': [1024, 128],
          'output_size': 12,
          'activation_function': nn.SELU,
          'dropout_p': 0.45
         }

input_size, hidden_layers, output_size, activation_function, dropout_p = params.values()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = DNN(input_size, hidden_layers, output_size, activation_function, p=dropout_p)
model.load_state_dict(torch.load('../pretraining/training/dnn_best_model.pth', map_location=device))
model.eval()

lr_model = LRNN(model)
lr_model.to(device)

model_dict = model.state_dict()
lr_model_dict = lr_model.state_dict()

filtered_dict = {k: v for k, v in model_dict.items() if k in lr_model_dict and 'output_layer' not in k}

lr_model_dict.update(filtered_dict)
lr_model.load_state_dict(lr_model_dict)

<All keys matched successfully>

In [15]:
lr_model

LRNN(
  (input_layer): Linear(in_features=2248, out_features=1024, bias=True)
  (act1): SELU()
  (dropout1): AlphaDropout(p=0.45, inplace=False)
  (hidden_layers): Sequential(
    (0): Sequential(
      (0): AlphaDropout(p=0.45, inplace=False)
      (1): Linear(in_features=1024, out_features=128, bias=True)
      (2): SELU()
    )
  )
  (output_layer2): Linear(in_features=128, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)

In [19]:
with torch.no_grad():
    input_data_active = torch.from_numpy(toxcast_features[val_tasks_indices[0]['indices_active']]).to(torch.float32).to(device)
    input_data_inactive = torch.from_numpy(toxcast_features[val_tasks_indices[0]['indices_inactive']]).to(torch.float32).to(device)
    input_data_rest = torch.from_numpy(toxcast_features[val_tasks_indices[0]['indices_rest']]).to(torch.float32).to(device)
    output_active = lr_model(input_data_active)
    output_inactive = lr_model(input_data_inactive)
    output_rest = lr_model(input_data_rest)

print(output_active.shape, output_inactive.shape, output_rest.shape)

torch.Size([5, 1]) torch.Size([5, 1]) torch.Size([8585, 1])
