In [None]:
import pandas as pd
train_data = pd.read_csv('/content/drive/MyDrive/SPH 6004/Assignment 2/Version 2/merged_train.csv')
test_data = pd.read_csv('/content/drive/MyDrive/SPH 6004/Assignment 2/Version 2/merged_test.csv')
holdout_data = pd.read_csv('/content/drive/MyDrive/SPH 6004/Assignment 2/Version 2/merged_holdout.csv')

In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def create_sequences_with_targets(data, timestep=5):
    numeric_columns = data.select_dtypes(include=[np.number]).columns.tolist()
    # Include the target column in the processing, but do not include it in the scaling
    numeric = data[numeric_columns]
    columns_to_scale = [col for col in numeric.columns if col not in ['id', 'charttime', 'los_icu', 'icu_death', 'icu_outcome']]

    # Group data by 'id'
    grouped_data = data.groupby('id')
    scaler = MinMaxScaler()

    time_series = []
    targets = []

    for _, group_data in grouped_data:
        scaled_data = scaler.fit_transform(group_data[columns_to_scale].iloc[-timestep:])
        target_data = group_data['icu_death'].values[-1] if len(group_data) >= timestep else group_data['icu_death'].values[0]

        if len(group_data) >= timestep:
            time_series.append(scaled_data)
            targets.append(target_data)
        else:
            padding_rows = timestep - len(group_data)
            repeated_data = np.tile(scaled_data[0], (padding_rows, 1))
            combined_data = np.vstack([repeated_data, scaled_data])
            time_series.append(combined_data)
            targets.append(target_data)

    return np.array(time_series), np.array(targets)

In [None]:
train_sequences, train_targets = create_sequences_with_targets(train_data)
test_sequences, test_targets = create_sequences_with_targets(test_data)
holdout_sequences, holdout_targets = create_sequences_with_targets(holdout_data)

In [None]:
train_sequences.shape

(14289, 5, 60)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import MeanSquaredError

model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=train_sequences.shape[1:]),  # input_shape is (time steps, features)
    MaxPooling1D(pool_size=2),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',  # Normally use binary_crossentropy for classification
              metrics=[MeanSquaredError()])

ValueError: One of the dimensions in the output is <= 0 due to downsampling in conv1d_7. Consider increasing the input size. Received input shape [None, 1, 32] which would produce output shape with a zero or negative value in a dimension.

In [None]:
history = model.fit(train_sequences, train_targets, epochs=10, batch_size=32, validation_data = (holdout_sequences,holdout_targets))

In [None]:
test_targets_death = test_data['icu_death']
model.evaluate(test_sequences, test_targets)