# config and Load Data

In [4]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_fscore_support
from sklearn.model_selection import train_test_split
from BaseNet_clstm import *
from model import MMDNet, PredictNet
from loss import *

ModuleNotFoundError: No module named 'BaseNet_clstm'

In [1]:

# ===================================== Hyper-parameters =====================================
batch_size = 512
lr = 0.001
l2_decay = 5e-4
num_epochs = 14
input_size = 150
hidden_size = 128 # output of hidden features
sequence_length = 100 # number of visits
num_layers = 2
num_classes = 1 # dense 1 for binary classification
# ===================================== LOAD DATA =====================================

print("loading data...")
path = r"/home/ameen/Desktop/JAIM/data/ML"
X_Temple = np.load(path + "Temple_X.csv")
y_Temple = np.load(path + "Temple_y.csv")
print("Temple data:", X_Temple.shape)

X_PSU = np.load(path + "PSU_X.csv")
y_PSU = np.load(path + "PSU_y.csv")
print("PSU data:", X_PSU.shape)

X_Hopkins = np.load(path + "Hopkins_X.csv")
y_Hopkins = np.load(path + "Hopkins_y.csv")
print("Hopkins data:", X_Hopkins.shape)

X_Geisinger = np.load(path + "Geisinger_X.csv")
y_Geisinger = np.load(path + "Geisinger_y.csv")
print("Geisinger data:", X_Geisinger.shape)

X_PITT = np.load(path + "PITT_X.csv")
y_PITT = np.load(path + "PITT_y.csv")
print("PITT data:", X_PITT.shape)

# --------------------------------

# reshape to 3D
print("\nReshaping to 3D...")
X_Temple = X_Temple.reshape((X_Temple.shape[0], 100, 150))
print("Temple - 3D tensor:", X_Temple.shape)

X_PSU = X_PSU.reshape((X_PSU.shape[0], 100, 150))
print("PSU - 3D tensor:", X_PSU.shape)

X_Hopkins = X_Hopkins.reshape((X_Hopkins.shape[0], 100, 150))
print("Hopkins - 3D tensor:", X_Hopkins.shape)

X_PITT = X_PITT.reshape((X_PITT.shape[0], 100, 150))
print("PITT - 3D tensor:", X_PITT.shape)

X_Geisinger = X_Geisinger.reshape((X_Geisinger.shape[0], 100, 150))
print("Geisinger - 3D tensor:", X_Geisinger.shape)

# -----------------------------------------------
print("preparing for DataLoader (X, y) tuple...")
Temple_data = []
for i in range(len(X_Temple)):
    t_ = (X_Temple[i], y_Temple[i])
    Temple_data.append( t_ )

PSU_data = []
for i in range(len(X_PSU)):
    t_ = (X_PSU[i], y_PSU[i])
    PSU_data.append( t_ )

Geisinger_data = []
for i in range(len(X_Geisinger)):
    t_ = (X_Geisinger[i], y_Geisinger[i])
    Geisinger_data.append( t_ )

Hopkins_data = []
for i in range(len(X_Hopkins)):
    t_ = (X_Hopkins[i], y_Hopkins[i])
    Hopkins_data.append( t_ )

PITT_data = []
for i in range(len(X_PITT)):
    t_ = (X_PITT[i], y_PITT[i])
    PITT_data.append( t_ )

# -----------------------------------------

# split train validation -> 5% validation from training sets
X_Temple_train, X_Temple_val = train_test_split(
    Temple_data, test_size=0.05, random_state=42)

X_Hopkins_train, X_Hopkins_val = train_test_split(
    Hopkins_data, test_size=0.05, random_state=42)

X_PITT_train, X_PITT_val = train_test_split(
    PITT_data, test_size=0.05, random_state=42)

X_PSU_train, X_PSU_val = train_test_split(
    PSU_data, test_size=0.05, random_state=42)


print(
    "X_Temple_train %d \n"
    "X_Temple_val %d \n"
    "X_PSU_train %d \n"
    "X_PSU_val %d \n"
    "X_Hopkins_train %d \n"
    "X_Hopkins_val %d \n"
    "X_PITT_train %d \n"
    "X_PITT_val %d \n"
    "X_Geisinger -> test %d \n"

    % (
        len(X_Temple_train), len(X_Temple_val),
        len(X_PSU_train), len(X_PSU_val),
        len(X_Hopkins_train), len(X_Hopkins_val),
        len(X_PITT_train), len(X_PITT_val),
        len(Geisinger_data),
    )
)

# DataLoader
print("DataLoader...")
# test
tgt_test_dataloader = DataLoader(dataset=Geisinger_data, batch_size=batch_size, shuffle=False, drop_last=True)

# train
src_1_train_dataloader = DataLoader(dataset=X_Temple_train, batch_size=batch_size, shuffle=False, drop_last=True)
src_1_validate_dataloader = DataLoader(dataset=X_Temple_val, batch_size=batch_size, shuffle=False, drop_last=True)

src_2_train_dataloader = DataLoader(dataset=X_Hopkins_train, batch_size=batch_size, shuffle=False, drop_last=True)
src_2_validate_dataloader = DataLoader(dataset=X_Hopkins_val, batch_size=batch_size, shuffle=False, drop_last=True)

src_3_train_dataloader = DataLoader(dataset=X_PSU_train, batch_size=batch_size, shuffle=False, drop_last=True)
src_3_validate_dataloader = DataLoader(dataset=X_PSU_val, batch_size=batch_size, shuffle=False, drop_last=True)

src_4_train_dataloader = DataLoader(dataset=X_PITT_train, batch_size=batch_size, shuffle=False, drop_last=True)
src_4_validate_dataloader = DataLoader(dataset=X_PITT_val, batch_size=batch_size, shuffle=False, drop_last=True)

# ==========================================================================================
device = torch.device("cpu")
len_tgt = len(tgt_test_dataloader)
len_src_1_train = len(src_1_train_dataloader)
len_src_2_train = len(src_2_train_dataloader)
len_src_3_train = len(src_3_train_dataloader)
len_src_4_train = len(src_4_train_dataloader)


src_loss_list = []
total_loss_list = []
tgt_val_loss_list = []

seed = 32
np.random.seed(seed=seed)
torch.manual_seed(seed)

# Initialize network
BaseNet = BaseNet_clstm(input_size, hidden_size, num_layers, num_classes).to(device)
TransferNet = MMDNet().to(device)
TaskNet = PredictNet().to(device)

# Loss and optimizer
task_criterion = nn.BCELoss()

# optimizer = optim.Adam(BaseNet.parameters(), lr=lr)
optimizer = optim.Adam([
    {'params': BaseNet.parameters()},
    {'params': TransferNet.parameters()},
    {'params': TaskNet.parameters()}], lr=lr, weight_decay=l2_decay)


src_loss_list = []
total_loss_list = []
tgt_val_loss_list = []

best_bce = 1.0

ModuleNotFoundError: No module named 'BaseNet_clstm'