#  LOT perdiciton Model Training
The task is to predict if the LOS > 3 Days by logistic regression.

This document is a based on [this code](https://github.com/MLforHealth/MIMIC_Extract/blob/master/notebooks/Baselines%20for%20Mortality%20and%20LOS%20prediction%20-%20Sklearn.ipynb)

In [1]:
from __future__ import print_function, division

In [2]:
import os, pickle, pandas as pd, numpy as np, scipy.stats as ss

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score

import torch


In [3]:
DATA_FILEPATH     = './mimic_data/final/grouping_5/all_hourly_data.h5'
RAW_DATA_FILEPATH = './mimic_data/final/nogrouping_5/all_hourly_data.h5'
GAP_TIME          = 6  # In hours
WINDOW_SIZE       = 24 # In hours
SEED              = 1
ID_COLS           = ['subject_id', 'hadm_id', 'icustay_id']
GPU               = '2'

os.environ['CUDA_VISIBLE_DEVICES'] = GPU
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x7f5993718fc0>

In [15]:
data_full_lvl2 = pd.read_hdf(DATA_FILEPATH, 'vitals_labs')
data_full_raw  = pd.read_hdf(RAW_DATA_FILEPATH, 'vitals_labs') 
statics        = pd.read_hdf(DATA_FILEPATH, 'patients')

In [None]:
data_full_lvl2.head()

In [None]:
data_full_raw.head()

In [None]:
statics.head()

In [16]:
def simple_imputer(df):
    idx = pd.IndexSlice
    df = df.copy()
    if len(df.columns.names) > 2: df.columns = df.columns.droplevel(('label', 'LEVEL1', 'LEVEL2'))
    
    df_out = df.loc[:, idx[:, ['mean', 'count']]]
    icustay_means = df_out.loc[:, idx[:, 'mean']].groupby(ID_COLS).mean()
    
    df_out.loc[:,idx[:,'mean']] = df_out.loc[:,idx[:,'mean']].groupby(ID_COLS).fillna(
        method='ffill'
    ).groupby(ID_COLS).fillna(icustay_means).fillna(0)
    
    df_out.loc[:, idx[:, 'count']] = (df.loc[:, idx[:, 'count']] > 0).astype(float)
    df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True)
    
    is_absent = (1 - df_out.loc[:, idx[:, 'mask']])
    hours_of_absence = is_absent.cumsum()
    time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill')
    time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True)

    df_out = pd.concat((df_out, time_since_measured), axis=1)
    df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100)
    
    df_out.sort_index(axis=1, inplace=True)
    return df_out

In [17]:
Ys = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['mort_hosp', 'mort_icu', 'los_icu']]
Ys['los_3'] = Ys['los_icu'] > 3
Ys.drop(columns=['los_icu'], inplace=True)
Ys.astype(float)

lvl2, raw = [df[
    (df.index.get_level_values('icustay_id').isin(set(Ys.index.get_level_values('icustay_id')))) &
    (df.index.get_level_values('hours_in') < WINDOW_SIZE)
] for df in (data_full_lvl2, data_full_raw)]

raw.columns = raw.columns.droplevel(level=['label', 'LEVEL1', 'LEVEL2'])

train_frac, dev_frac, test_frac = 0.7, 0.1, 0.2
lvl2_subj_idx, raw_subj_idx, Ys_subj_idx = [df.index.get_level_values('subject_id') for df in (lvl2, raw, Ys)]
lvl2_subjects = set(lvl2_subj_idx)
assert lvl2_subjects == set(Ys_subj_idx), "Subject ID pools differ!"
assert lvl2_subjects == set(raw_subj_idx), "Subject ID pools differ!"

np.random.seed(SEED)
subjects, N = np.random.permutation(list(lvl2_subjects)), len(lvl2_subjects)
N_train, N_dev, N_test = int(train_frac * N), int(dev_frac * N), int(test_frac * N)
train_subj = subjects[:N_train]
dev_subj   = subjects[N_train:N_train + N_dev]
test_subj  = subjects[N_train+N_dev:]

[(lvl2_train, lvl2_dev, lvl2_test), (raw_train, raw_dev, raw_test), (Ys_train, Ys_dev, Ys_test)] = [
    [df[df.index.get_level_values('subject_id').isin(s)] for s in (train_subj, dev_subj, test_subj)] \
    for df in (lvl2, raw, Ys)
]

idx = pd.IndexSlice
lvl2_means, lvl2_stds = lvl2_train.loc[:, idx[:,'mean']].mean(axis=0), lvl2_train.loc[:, idx[:,'mean']].std(axis=0)
raw_means, raw_stds = raw_train.loc[:, idx[:,'mean']].mean(axis=0), raw_train.loc[:, idx[:,'mean']].std(axis=0)

lvl2_train.loc[:, idx[:,'mean']] = (lvl2_train.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds
lvl2_dev.loc[:, idx[:,'mean']] = (lvl2_dev.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds
lvl2_test.loc[:, idx[:,'mean']] = (lvl2_test.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds

raw_train.loc[:, idx[:,'mean']] = (raw_train.loc[:, idx[:,'mean']] - raw_means)/raw_stds
raw_dev.loc[:, idx[:,'mean']] = (raw_dev.loc[:, idx[:,'mean']] - raw_means)/raw_stds
raw_test.loc[:, idx[:,'mean']] = (raw_test.loc[:, idx[:,'mean']] - raw_means)/raw_stds

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


If raw or lvl2 data

In [None]:
# raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test = [
#     simple_imputer(df) for df in (raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test)
# ]
# raw_flat_train, raw_flat_dev, raw_flat_test, lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test = [
#     df.pivot_table(index=['subject_id', 'hadm_id', 'icustay_id'], columns=['hours_in']) for df in (
#         raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test
#     )
# ]

# for df in lvl2_train, lvl2_dev, lvl2_test, raw_train, raw_dev, raw_test: assert not df.isnull().any().any()

In [18]:
lvl2_train, lvl2_dev, lvl2_test = [ simple_imputer(df) for df in ( lvl2_train, lvl2_dev, lvl2_test)]
lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test = [df.pivot_table(index=['subject_id', 'hadm_id', 'icustay_id'], 
                                                    columns=['hours_in']) for df in (lvl2_train, lvl2_dev, lvl2_test) ]

for df in lvl2_train, lvl2_dev, lvl2_test: assert not df.isnull().any().any()
for df in lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test: assert not df.isnull().any().any()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [None]:
# # Group DataFrames in a list
# dataframes = [raw_flat_train, raw_flat_dev, raw_flat_test, lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test ]
# raw_flat_train.to_csv('raw_flat_train.csv')
# raw_flat_dev.to_csv('raw_flat_dev.csv')
# raw_flat_test.to_csv('raw_flat_test.csv')
# lvl2_flat_train.to_csv('lvl2_flat_train.csv')
# lvl2_flat_dev.to_csv('lvl2_flat_dev.csv')
# lvl2_flat_test.to_csv('lvl2_flat_test.csv')
# Ys_train.to_csv('Ys_train.csv')
# Ys_dev.to_csv('Ys_dev.csv')
# Ys_test.to_csv('Ys_test.csv')

In [7]:
# raw_flat_train = pd.read_csv('load_data/raw_flat_train.csv', index_col=[0,1,2])
# raw_flat_dev = pd.read_csv('load_data/raw_flat_dev.csv', index_col=[0,1,2])
# raw_flat_test = pd.read_csv('load_data/raw_flat_test.csv', index_col=[0,1,2])
# lvl2_flat_train = pd.read_csv('load_data/lvl2_flat_train.csv', index_col=[0,1,2])
# lvl2_flat_dev = pd.read_csv('load_data/lvl2_flat_dev.csv', index_col=[0,1,2])
# lvl2_flat_test = pd.read_csv('load_data/lvl2_flat_test.csv', index_col=[0,1,2])
# Ys_train = pd.read_csv('load_data/Ys_train.csv', index_col=[0])
# Ys_dev = pd.read_csv('load_data/Ys_dev.csv', index_col=[0])
# Ys_test = pd.read_csv('load_data/Ys_test.csv', index_col=[0])

In [None]:
Ys_test['los_3'].head

In [None]:

hyperparams = dict({
    'C': 0.18544999360231632,
    'penalty': 'l2',
    'solver': 'liblinear',
    'max_iter': 100
})
np.random.seed(SEED)

In [None]:
def run_only_final(model, hyperparams, X_flat_train, X_flat_dev, X_flat_test):
    best_M = model(**hyperparams)
    best_M.fit(pd.concat((X_flat_train, X_flat_dev)), pd.concat((Ys_train, Ys_dev))['los_3'])
    y_true  = Ys_test['los_3']
    y_score = best_M.predict_proba(X_flat_test)[:, 1]
    y_pred  = best_M.predict(X_flat_test)

    auc   = roc_auc_score(y_true, y_score)
    auprc = average_precision_score(y_true, y_score)
    acc   = accuracy_score(y_true, y_pred)
    F1    = f1_score(y_true, y_pred)
    
    return best_M, hyperparams, auc, auprc, acc, F1

In [None]:
results = run_only_final(LogisticRegression,
                                hyperparams,
                                lvl2_flat_train,
                                lvl2_flat_dev,
                                lvl2_flat_test) 
results

If we want to have  pytorch LR model

In [None]:
from torch.utils.data import DataLoader, TensorDataset

import torch
import torch.nn as nn
import torch.optim as optim
import pickle
from pathlib import Path

class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim: int):
        """Initialize the logistic regression model with a single linear layer.

        Args:
        ----
            input_dim (int): The size of the input feature vector.
        """
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)  # Binary classification (1 output)
        # Metadata initialization
        self.init_params = {"input_dim": input_dim}

    def forward(self, x):
        """Forward pass through the model."""
        return torch.sigmoid(self.linear(x))  # Sigmoid to produce probabilities for binary classification


# Function to save the model and metadata
# def save_model_and_metadata(  # noqa: PLR0913
#     model: torch.nn.Module,
#     data_split: dict,
#     configs: dict,
#     train_acc: float,
#     test_acc: float,
#     train_loss: float,
#     test_loss: float,
#     optimizer: optim.Optimizer,
#     loss: nn.Module,
#     n: str
# ) -> None:
#     """Save the model and metadata."""
#     log_dir = configs["run"]["log_dir"]
#     Path(log_dir).mkdir(parents=True, exist_ok=True)

#     with open(f"{log_dir}/target_model" + n + ".pkl", "wb") as f:
#         torch.save(model.state_dict(), f)

#     meta_data = {}

#     meta_data["init_params"] = model.init_params if hasattr(model, "init_params") else {}
#     # meta_data["train_indices"] = data_split["train_indices"]
#     # meta_data["test_indices"] = data_split["test_indices"]
#     # meta_data["num_train"] = len(data_split["train_indices"])

#     # read out optimizer parameters
#     meta_data["optimizer"] = {}
#     meta_data["optimizer"]["name"] = optimizer.__class__.__name__.lower()
#     meta_data["optimizer"]["lr"] = optimizer.param_groups[0].get("lr", 0)
#     meta_data["optimizer"]["weight_decay"] = optimizer.param_groups[0].get("weight_decay", 0)
#     meta_data["optimizer"]["momentum"] = optimizer.param_groups[0].get("momentum", 0)
#     meta_data["optimizer"]["dampening"] = optimizer.param_groups[0].get("dampening", 0)
#     meta_data["optimizer"]["nesterov"] = optimizer.param_groups[0].get("nesterov", False)

#     # read out loss parameters
#     meta_data["loss"] = {}
#     meta_data["loss"]["name"] = loss.__class__.__name__.lower()

#     meta_data["batch_size"] = configs["train"]["batch_size"]
#     meta_data["epochs"] = configs["train"]["epochs"]
#     meta_data["learning_rate"] = configs["train"]["learning_rate"]
#     meta_data["weight_decay"] = configs["train"]["weight_decay"]
#     meta_data["train_acc"] = train_acc
#     meta_data["test_acc"] = test_acc
#     meta_data["train_loss"] = train_loss
#     meta_data["test_loss"] = test_loss
#     meta_data["dataset"] = configs["data"]["dataset"]

#     with open(f"{log_dir}/model_metadata"+ n + ".pkl", "wb") as f:
#         pickle.dump(meta_data, f)

import torch.nn.init as init
# Training and evaluation setup
def train_and_save_logistic_regression(X_train, y_train, X_test, y_test, configs):
    # Convert the inverse regularization parameter C to weight_decay (regularization strength)


    # Initialize the model
    input_dim = X_train.shape[1]  # Assuming X_train is a NumPy array or similar
    print(input_dim)
    model = LogisticRegressionModel(input_dim)
    criterion = nn.BCELoss()


    optimizer = optim.SGD(model.parameters(), lr =configs["train"]["learning_rate"], 
                            weight_decay=configs["train"]["weight_decay"])

    # Training loop (max_iter = number of epochs)
    epochs =  configs["train"]["epochs"]
    batch_size = configs["train"]["batch_size"]
    
    inputs = torch.tensor(X_train.values, dtype=torch.float32)
    # print(f"inputs {y_train[:,None].shape}")
    labels = torch.tensor(y_train[:,None], dtype=torch.float32)
   
    # Create a TensorDataset and DataLoader for batch processing
    dataset = TensorDataset(inputs, labels)
    batch_size = 128
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        
    for epoch in range(epochs):
        model.train()

        epoch_loss = 0
        for batch_inputs, batch_labels in train_loader:

            optimizer.zero_grad()  # Zero the gradient buffers

            outputs = model(batch_inputs)  # Forward pass
            loss = criterion(outputs, batch_labels)  # Calculate loss
            loss.backward()  # Backward pass

            optimizer.step()  # Optimize
            epoch_loss += loss.item() * batch_size
        
        epoch_loss /= len(dataset)
        if epoch % 2 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}')

    # Evaluation on test set
    with torch.no_grad():
        inputs = torch.tensor(X_test.values, dtype=torch.float32)
        labels = torch.tensor(y_test, dtype=torch.float32)
        outputs = model(inputs).squeeze()
        predicted = (outputs >= 0.5).float()
        correct = (predicted == labels).float().sum()
        test_acc = correct / len(labels)
        test_loss = criterion(outputs, labels).item()

    # Calculate training accuracy and loss
    with torch.no_grad():
        inputs = torch.tensor(X_train.values, dtype=torch.float32)
        labels = torch.tensor(y_train, dtype=torch.float32)
        outputs = model(inputs).squeeze()
        predicted = (outputs >= 0.5).float()
        correct = (predicted == labels).float().sum()
        train_acc = correct / len(labels)
        train_loss = criterion(outputs, labels).item()

    print(f'Test Accuracy: {test_acc.item():.4f}, Test Loss: {test_loss:.4f}')
    print(f'Train Accuracy: {train_acc.item():.4f}, Train Loss: {train_loss:.4f}')
    # Save the model and metadata
    # save_model_and_metadata(
    #     model=model,
    #     data_split=data_split,
    #     configs=configs,
    #     train_acc=train_acc.item(),
    #     test_acc=test_acc.item(),
    #     train_loss=train_loss,
    #     test_loss=test_loss,
    #     optimizer=optimizer,
    #     loss=criterion,
    #     n=n
    # )


# Example configurations and data split
configs = {
    "run": {"log_dir": "./logs"},
    "train": {"batch_size": 128, "epochs": 10, "learning_rate": 0.001, "weight_decay": 0.0001},

}



# for n, X_flat_train, X_flat_dev, X_flat_test in (
#     ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),
#     ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)):
    # results = run_only_final(LogisticRegression,
    #                             best_hyperparams,
    #                             X_flat_train,
    #                             X_flat_dev,
    #                             X_flat_test,
    #                             'los_3')


  

train_and_save_logistic_regression(lvl2_flat_train,
                                        Ys_train['los_3'],
                                        lvl2_flat_test,
                                        Ys_test['los_3'],
                                        configs)

Class Distibution

In [None]:
class_0_count = ( Ys_train['los_3'] == 0).sum()  # Count of class 0 samples
class_1_count = ( Ys_train['los_3'] == 1).sum()  # Count of class 1 samples
pos_weight = class_0_count / class_1_count  # Compute class weight for class 1
pos_weight

In [None]:

for df in lvl2_flat_train, lvl2_flat_test : assert not df.isnull().any().any()
lvl2_flat_train.values.shape


In [None]:
class DeepBinaryClassifier(nn.Module):
    def __init__(self, input_dim):
        super(DeepBinaryClassifier, self).__init__()
        
        # Hidden Layer 1
        self.fc1 = nn.Linear(input_dim, 512)
        self.bn1 = nn.BatchNorm1d(512)  # Batch Normalization
        self.dropout1 = nn.Dropout(0.3)  # Dropout for regularization
        
        # Hidden Layer 2
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.dropout2 = nn.Dropout(0.3)
        
        # Hidden Layer 3
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.dropout3 = nn.Dropout(0.3)
        
        # Output Layer (Binary Classification)
        self.output = nn.Linear(128, 1)
    
    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        
        return self.output(x)  # Do not apply sigmoid here, we will use BCEWithLogitsLoss for stability




# Convert the dataset to PyTorch tensors
X_train_tensor = torch.tensor(lvl2_flat_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor( Ys_train['los_3'], dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(lvl2_flat_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(Ys_test['los_3'], dtype=torch.float32).unsqueeze(1)

# Create a DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)




# Instantiate the model, loss function, and optimizer
model = DeepBinaryClassifier(input_dim =X_train_tensor.shape[1])

criterion = nn.BCEWithLogitsLoss()  # Combines sigmoid + BCE loss in a stable manner
optimizer = optim.Adam(model.parameters(), lr= 0.001)

# Training loop
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()  # Clear previous gradients
        outputs = model(X_batch)  # Forward pass
        loss = criterion(outputs, y_batch)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/10], Loss: {running_loss/len(train_loader):.4f}")

# Evaluation on the test set
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    predicted = torch.sigmoid(test_outputs).round()  # Convert logits to probabilities and round
    accuracy = (predicted == y_test_tensor).float().mean()
    print(f"Test Accuracy: {accuracy.item():.4f}")

In [None]:
X_train_tensor.shape[1]

In [19]:
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn as nn
import torch.optim as optim

class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim: int):
        """Initialize the logistic regression model with a single linear layer."""
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)  # Binary classification (1 output)
    
    def forward(self, x):
        """Forward pass through the model."""
        return torch.sigmoid(self.linear(x))  # Sigmoid for binary classification

# Training and evaluation setup
def train_and_save_logistic_regression(X_train, y_train, X_test, y_test, configs):
    # Initialize the model
    input_dim = X_train.shape[1]  # Assuming X_train is a NumPy array or similar
    model = LogisticRegressionModel(input_dim)
    criterion = nn.BCELoss()

    optimizer = optim.SGD(model.parameters(), lr=configs["train"]["learning_rate"], 
                          weight_decay=configs["train"]["weight_decay"])

    # Training loop (max_iter = number of epochs)
    epochs = configs["train"]["epochs"]
    batch_size = configs["train"]["batch_size"]

    inputs = torch.tensor(X_train.values, dtype=torch.float32)
    labels = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32)

    # Create a TensorDataset and DataLoader for batch processing
    dataset = TensorDataset(inputs, labels)
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for batch_inputs, batch_labels in train_loader:
            optimizer.zero_grad()  # Zero the gradient buffers
            outputs = model(batch_inputs)  # Forward pass
            loss = criterion(outputs, batch_labels)  # Calculate loss
            loss.backward()  # Backward pass
            optimizer.step()  # Optimize
            epoch_loss += loss.item() * batch_size
        
        epoch_loss /= len(dataset)
        if epoch % 2 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}')

    # Evaluation on test set
    with torch.no_grad():
        inputs = torch.tensor(X_test.values, dtype=torch.float32)
        labels = torch.tensor(y_test.values, dtype=torch.float32)
        outputs = model(inputs).squeeze()
        predicted = (outputs >= 0.5).float()
        correct = (predicted == labels).float().sum()
        test_acc = correct / len(labels)
        test_loss = criterion(outputs, labels).item()

    # Calculate training accuracy and loss
    with torch.no_grad():
        inputs = torch.tensor(X_train.values, dtype=torch.float32)
        labels = torch.tensor(y_train.values, dtype=torch.float32)
        outputs = model(inputs).squeeze()
        predicted = (outputs >= 0.5).float()
        correct = (predicted == labels).float().sum()
        train_acc = correct / len(labels)
        train_loss = criterion(outputs, labels).item()

    print(f'Test Accuracy: {test_acc.item():.4f}, Test Loss: {test_loss:.4f}')
    print(f'Train Accuracy: {train_acc.item():.4f}, Train Loss: {train_loss:.4f}')

# Example configurations and data split
configs = {
    "run": {"log_dir": "./logs"},
    "train": {"batch_size": 128, "epochs": 50, "learning_rate": 1e-4, "weight_decay": 5.392},

} 

from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler on training data and transform both training and test sets
continuous_columns = lvl2_flat_train.select_dtypes(include=['float64', 'int64']).columns
X_continuous = scaler.fit_transform(lvl2_flat_train[continuous_columns])
X1_continuous = scaler.transform(lvl2_flat_test[continuous_columns])

X_scaled = lvl2_flat_train.copy()
X1_scaled = lvl2_flat_test.copy()

X_scaled[continuous_columns] = X_continuous
X1_scaled[continuous_columns] = X1_continuous

X1 = scaler.transform(lvl2_flat_test)
X = pd.DataFrame(X_scaled, columns=lvl2_flat_train.columns)
X1 = pd.DataFrame(X1_scaled, columns=lvl2_flat_test.columns)
train_and_save_logistic_regression(X, Ys_train['los_3'], X1, Ys_test['los_3'], configs)

Epoch [1/50], Loss: 0.7155
Epoch [3/50], Loss: 0.6688
Epoch [5/50], Loss: 0.6524
Epoch [7/50], Loss: 0.6430
Epoch [9/50], Loss: 0.6369
Epoch [11/50], Loss: 0.6330
Epoch [13/50], Loss: 0.6301
Epoch [15/50], Loss: 0.6282
Epoch [17/50], Loss: 0.6266
Epoch [19/50], Loss: 0.6255
Epoch [21/50], Loss: 0.6246
Epoch [23/50], Loss: 0.6240
Epoch [25/50], Loss: 0.6234
Epoch [27/50], Loss: 0.6231
Epoch [29/50], Loss: 0.6227
Epoch [31/50], Loss: 0.6225
Epoch [33/50], Loss: 0.6223
Epoch [35/50], Loss: 0.6221
Epoch [37/50], Loss: 0.6220
Epoch [39/50], Loss: 0.6219
Epoch [41/50], Loss: 0.6218
Epoch [43/50], Loss: 0.6217
Epoch [45/50], Loss: 0.6216
Epoch [47/50], Loss: 0.6216
Epoch [49/50], Loss: 0.6214
Test Accuracy: 0.6708, Test Loss: 0.6261
Train Accuracy: 0.6754, Train Loss: 0.6206


In [14]:
lvl2_flat_train.dtypes

alanine aminotransferase           object
alanine aminotransferase.1         object
alanine aminotransferase.2         object
alanine aminotransferase.3         object
alanine aminotransferase.4         object
alanine aminotransferase.5         object
alanine aminotransferase.6         object
alanine aminotransferase.7         object
alanine aminotransferase.8         object
alanine aminotransferase.9         object
alanine aminotransferase.10        object
alanine aminotransferase.11        object
alanine aminotransferase.12        object
alanine aminotransferase.13        object
alanine aminotransferase.14        object
alanine aminotransferase.15        object
alanine aminotransferase.16        object
alanine aminotransferase.17        object
alanine aminotransferase.18        object
alanine aminotransferase.19        object
alanine aminotransferase.20        object
alanine aminotransferase.21        object
alanine aminotransferase.22        object
alanine aminotransferase.23       