<a href="https://colab.research.google.com/github/Zfeng0207/FIT3199-FYP/blob/dev%2Fryuji/CNN%20model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv("00_recurrent_stroke_patient.csv")

# Convert categorical target column "Stroke_Y/N" to binary (0 or 1)
df["Stroke_Y/N"] = df["Stroke_Y/N"].astype(int)

# Convert datetime columns to timestamps
if "charttime" in df.columns:
    df["charttime"] = pd.to_datetime(df["charttime"]).astype(int) // 10**9  # Convert to UNIX timestamp

# Drop non-numeric columns
non_numeric_cols = ["subject_id", "stay_id", "icd_code", "icd_title", "rhythm", "gender", "anchor_year_group", "dod"]
df = df.drop(columns=[col for col in non_numeric_cols if col in df.columns])

# Fill missing values (only numeric columns)
df = df.apply(pd.to_numeric, errors='coerce')  # Ensure all columns are numeric
df.fillna(df.median(), inplace=True)

# Normalize numerical features
features = [col for col in df.columns if col != "Stroke_Y/N"]
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Define input features (X) and target (y)
X = df[features].values  # Ensure it's a NumPy array
y = df["Stroke_Y/N"].values  # Target variable

# Convert to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)

# Split into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# PyTorch DataLoader
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=batch_size)

print("Data processing completed successfully!")

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Define CNN Model for Stroke Prediction
class StrokeCNN(nn.Module):
    def __init__(self, input_size, num_filters=64, kernel_size=3, dropout=0.3):
        super(StrokeCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=num_filters, kernel_size=kernel_size, padding=1)
        self.conv2 = nn.Conv1d(in_channels=num_filters, out_channels=num_filters * 2, kernel_size=kernel_size, padding=1)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.dropout = nn.Dropout(dropout)
        self.sigmoid = nn.Sigmoid()

        # Calculate output size after convolutions and pooling
        with torch.no_grad():
            sample_input = torch.rand(1, 1, input_size)  # Batch=1, Channels=1, Features=input_size
            sample_output = self.pool(torch.relu(self.conv1(sample_input)))
            sample_output = self.pool(torch.relu(self.conv2(sample_output)))
            self.flattened_size = sample_output.numel()  # Get the total number of features

        self.fc1 = nn.Linear(self.flattened_size, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = x.unsqueeze(1)  # Reshape to (batch, 1, features)
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(x.shape[0], -1)  # Flatten
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return self.sigmoid(x)

# Initialize model
input_size = X_tensor.shape[1]  # Number of features
model = StrokeCNN(input_size)

# Loss function & optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# Training function with validation accuracy
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=30):
    for epoch in range(epochs):
        model.train()
        total_loss, correct_train, total_train = 0, 0, 0

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

            # Compute training accuracy
            predicted = (y_pred > 0.5).float()
            correct_train += (predicted == y_batch).sum().item()
            total_train += y_batch.size(0)

        # Validation step
        model.eval()
        val_loss, correct_val, total_val = 0, 0, 0
        with torch.no_grad():
            for X_val, y_val in val_loader:
                y_val_pred = model(X_val)
                val_loss += criterion(y_val_pred, y_val).item()
                predicted_val = (y_val_pred > 0.5).float()
                correct_val += (predicted_val == y_val).sum().item()
                total_val += y_val.size(0)

        # Compute accuracies
        train_acc = correct_train / total_train
        val_acc = correct_val / total_val

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {total_loss/len(train_loader):.4f}, "
              f"Train Acc: {train_acc:.4f}, Val Loss: {val_loss/len(val_loader):.4f}, "
              f"Val Acc: {val_acc:.4f}")

# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, epochs=30)

# Evaluation function with test accuracy
def evaluate_model(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for X_test, y_test in test_loader:
            y_test_pred = model(X_test)
            predicted = (y_test_pred > 0.5).float()
            correct += (predicted == y_test).sum().item()
            total += y_test.size(0)
    print(f'Test Accuracy: {correct / total:.4f}')

# Evaluate the model
evaluate_model(model, test_loader)



Data processing completed successfully!
Epoch 1/30, Train Loss: 0.3267, Train Acc: 0.8649, Val Loss: 0.3085, Val Acc: 0.8565
Epoch 2/30, Train Loss: 0.2885, Train Acc: 0.8649, Val Loss: 0.2948, Val Acc: 0.8565
Epoch 3/30, Train Loss: 0.2747, Train Acc: 0.8713, Val Loss: 0.2841, Val Acc: 0.8663
Epoch 4/30, Train Loss: 0.2668, Train Acc: 0.8767, Val Loss: 0.2784, Val Acc: 0.8689
Epoch 5/30, Train Loss: 0.2627, Train Acc: 0.8774, Val Loss: 0.2716, Val Acc: 0.8703
Epoch 6/30, Train Loss: 0.2612, Train Acc: 0.8780, Val Loss: 0.2744, Val Acc: 0.8703
Epoch 7/30, Train Loss: 0.2595, Train Acc: 0.8784, Val Loss: 0.2858, Val Acc: 0.8607
Epoch 8/30, Train Loss: 0.2571, Train Acc: 0.8785, Val Loss: 0.2679, Val Acc: 0.8703
Epoch 9/30, Train Loss: 0.2562, Train Acc: 0.8786, Val Loss: 0.2681, Val Acc: 0.8703
Epoch 10/30, Train Loss: 0.2542, Train Acc: 0.8800, Val Loss: 0.2665, Val Acc: 0.8714
Epoch 11/30, Train Loss: 0.2528, Train Acc: 0.8796, Val Loss: 0.2621, Val Acc: 0.8707
Epoch 12/30, Train Loss