# Task 3 Activation Map Reconstruction Cont 1

## CONSTRUCT

**Goal**

This file combine 500 sample. 

### Before Start

In [103]:
import glob, re, os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from typing import List
from sklearn.utils import shuffle

import torch
import torch.nn as nn
import torch.optim as optim

# Ignore warning
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.max_rows", 100)
plt.style.use('ggplot')

In [104]:
# Run the cardiac_ml_tools.py script
%run ../cardiac_challenge/notebooks/cardiac_ml_tools.py

In [105]:
# Load the dataset
data_dirs = []
regex = r'data_hearts_dd_0p2*'
DIR = '../cardiac_challenge/intracardiac_dataset/' # path to the intracardiac_dataset

for x in os.listdir(DIR):
    if re.match(regex, x):
        data_dirs.append(DIR + x)
file_pairs = read_data_dirs(data_dirs)
print('Number of file pairs: {}'.format(len(file_pairs)))
# example of file pair
print("Example of file pair:")
print("{}\n{}".format(file_pairs[0][0], file_pairs[0][1]))


Number of file pairs: 16117
Example of file pair:
../cardiac_challenge/intracardiac_dataset/data_hearts_dd_0p2_geo_act_3_bcl/pECGData_hearts_dd_0p2_geo_act_3_bcl_bcl.1000.pattern.0.volunteer.v1.npy
../cardiac_challenge/intracardiac_dataset/data_hearts_dd_0p2_geo_act_3_bcl/VmData_hearts_dd_0p2_geo_act_3_bcl_bcl.1000.pattern.0.volunteer.v1.npy



### 3.1 Data Processing

#### 3.1.1 Combine 500 Sample

In [106]:
# file_pairs is a list where each element is a tuple containing the file paths for ECG data and activation time data
num_samples = 500  # Number of samples to process
num_timesteps = 500  # Each ECG data has 500 timesteps
num_leads = 12  # Standard ECG leads count after processing

# Initialize arrays to store combined data
ECGData_500 = np.zeros((num_samples, num_timesteps * num_leads))  # Flattened array for 12 leads data
ActTime_500 = np.zeros((num_samples, 75))  # Store 75 activation times per sample

# Process each sample
for i in range(num_samples):
    # Load ECG data
    pECGData = np.load(file_pairs[i][0])
    pECGData = get_standard_leads(pECGData)  # Convert to 12 standard leads
    ECGData_500[i, :] = pECGData.flatten()  # Flatten and store in the combined array

    # Load activation time data
    VmData = np.load(file_pairs[i][1])
    ActTime = get_activation_time(VmData)
    ActTime_500[i, :] = ActTime.flatten()  # Flatten the (75, 1) array to fit into (75,) array


In [107]:
# Create directory if it does not exist
output_dir = '../combine_dataset'
os.makedirs(output_dir, exist_ok=True)

# Save combined datasets to .npy format
np.save(os.path.join(output_dir, 'ecg_data_500.npy'), ECGData_500)
np.save(os.path.join(output_dir, 'active_time_500.npy'), ActTime_500)


After combine, check them

In [108]:
print("ECGData_500 shape: {}".format(ECGData_500.shape))
print("ActTime_500 shape: {}".format(ActTime_500.shape))

ECGData_500 shape: (500, 6000)
ActTime_500 shape: (500, 75)


In [109]:
ECGData_500, ActTime_500

(array([[ 0.01860286,  0.02571926,  0.0071164 , ..., -0.00043366,
         -0.00047924, -0.00037289],
        [ 0.0166689 ,  0.01879395,  0.00212505, ..., -0.0003504 ,
         -0.00037333, -0.00018206],
        [ 0.01997983,  0.0190023 , -0.00097753, ..., -0.00093712,
         -0.00066594, -0.00049339],
        ...,
        [ 0.00144014,  0.00167206,  0.00023193, ..., -0.00081582,
          0.00018729, -0.00010726],
        [ 0.00210478,  0.00299183,  0.00088705, ..., -0.00049417,
          0.00041288,  0.00049894],
        [ 0.00081549,  0.00133864,  0.00052315, ..., -0.0008989 ,
          0.00046388,  0.00015533]]),
 array([[18., 31., 22., ..., 17., 11.,  5.],
        [12.,  3.,  8., ...,  4., 10., 16.],
        [10.,  6.,  1., ..., 19., 12.,  5.],
        ...,
        [11.,  6.,  1., ..., 40., 34., 27.],
        [14.,  8.,  2., ..., 33., 40., 45.],
        [16., 14., 10., ..., 43., 37., 31.]]))

#### 3.1.2 Split Dataset

In [110]:
# Shuffle indices
indices = np.arange(ECGData_500.shape[0])
shuffled_indices = shuffle(indices, random_state=42)

# Define the split point
split_ratio = 0.8  # 80% train, 20% test
split_point = int(len(shuffled_indices) * split_ratio)

# Split indices into training and test sets
train_indices = shuffled_indices[:split_point]
test_indices = shuffled_indices[split_point:]

# Use indices to create training and test data
X_train = ECGData_500[train_indices]
y_train = ActTime_500[train_indices]
X_test = ECGData_500[test_indices]
y_test = ActTime_500[test_indices]

In [111]:
X_train, y_train, X_test, y_test

(array([[ 3.26309393e-03,  3.85096802e-03,  5.87874090e-04, ...,
         -3.45405229e-04, -3.29793799e-04, -2.22777099e-04],
        [ 1.60217814e-03,  1.64650948e-03,  4.43313400e-05, ...,
         -8.20579544e-04,  9.82572556e-05, -5.18787444e-05],
        [ 3.04071210e-03,  2.47169349e-03, -5.69018612e-04, ...,
         -5.44660959e-04, -3.09872489e-04, -1.26951169e-04],
        ...,
        [ 1.92237300e-04, -7.01320100e-04, -8.93557400e-04, ...,
         -1.82680515e-03,  6.56034148e-04,  1.21244099e-03],
        [ 3.17828010e-03,  7.84831200e-03,  4.67003190e-03, ...,
         -3.95781928e-04, -1.94673998e-04, -3.90945876e-05],
        [ 7.00068570e-03,  7.81426910e-03,  8.13583400e-04, ...,
         -4.71721327e-04, -7.82934897e-04, -8.88557697e-04]]),
 array([[12., 13., 18., ..., 24., 30., 36.],
        [19., 13.,  4., ..., 32., 38., 44.],
        [18., 11., 31., ..., 36., 31., 24.],
        ...,
        [64., 57., 48., ..., 42., 39., 33.],
        [14., 20., 12., ..., 33., 28

In [112]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((400, 6000), (400, 75), (100, 6000), (100, 75))

### 3.2 Modeling

#### 3.2.1 Define the 1D CNN Model

In [116]:
class Simple1DCNN(nn.Module):
    def __init__(self):
        super(Simple1DCNN, self).__init__()

        # convolutional layers -> relu -> convolutional layers -> relu -> pooling -> flatten -> fully connected layers
        self.conv1 = nn.Conv1d(in_channels=12, out_channels=16, kernel_size=3, padding=2)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, padding=2)
        self.relu2 = nn.ReLU()
        self.pool = nn.MaxPool1d(3)
        self.flat = nn.Flatten()
        self.fc1 = nn.Linear(5376, 512)
        self.relu3 = nn.ReLU()
        # 32 channels * 1500 length
        self.fc2 = nn.Linear(512, 75)  # Output the activation times
        # # Layer 1: First convolutional layer
        # self.conv1 = nn.Conv1d(in_channels=12, out_channels=16, kernel_size=5, padding=2)
        # # Layer 2: ReLU activation function
        # self.relu = nn.ReLU()
        # # Layer 3: Max pooling layer
        # self.pool = nn.MaxPool1d(2)
        # # Layer 4: Second convolutional layer
        # self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5, padding=2)
        # # Layer 5: First fully connected layer
        # self.fc1 = nn.Linear(32*1500, 100)
        # # Layer 6: Second fully connected layer
        # self.fc2 = nn.Linear(100, 75)  # Output the activation times


    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool(x)
        x = self.flat(x)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        # print(x.size())  # This will print the shape [batch_size, num_channels, length]
        # x = x.view(x.size(0), -1)  # Flatten the output for the dense layer
        # print(x.size())  # This will print the flattened size
        return x


#### 3.2.2 Initialize the Model and Optimizer

In [117]:
model = Simple1DCNN()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Configure Loss Function
criterion = nn.MSELoss()

#### 3.2.3 Prepare & Train Model

In [118]:
# Prepare
X_train_tensor = torch.tensor(X_train.reshape(400, 12, 500), dtype=torch.float32)  # Add channel dimension
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

# Train
num_epochs = 50  # or however many you deem necessary

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 1434.59912109375
Epoch 2, Loss: 1424.7789306640625
Epoch 3, Loss: 1411.237548828125
Epoch 4, Loss: 1391.6141357421875
Epoch 5, Loss: 1365.222900390625
Epoch 6, Loss: 1331.3873291015625
Epoch 7, Loss: 1289.508544921875
Epoch 8, Loss: 1239.0467529296875
Epoch 9, Loss: 1179.60205078125
Epoch 10, Loss: 1110.9798583984375
Epoch 11, Loss: 1033.2666015625
Epoch 12, Loss: 947.083984375
Epoch 13, Loss: 853.6998901367188
Epoch 14, Loss: 755.3377075195312
Epoch 15, Loss: 655.5318603515625
Epoch 16, Loss: 559.5077514648438
Epoch 17, Loss: 474.5154724121094
Epoch 18, Loss: 409.87347412109375
Epoch 19, Loss: 375.7651062011719
Epoch 20, Loss: 378.8187561035156
Epoch 21, Loss: 412.80914306640625
Epoch 22, Loss: 453.71527099609375
Epoch 23, Loss: 475.7220764160156
Epoch 24, Loss: 469.61474609375
Epoch 25, Loss: 441.6982116699219
Epoch 26, Loss: 404.2726135253906
Epoch 27, Loss: 368.5992431640625
Epoch 28, Loss: 341.7756042480469
Epoch 29, Loss: 326.31622314453125
Epoch 30, Loss: 321.2221

### 3.3 Evaluate the model

In [None]:
# Evaluate model
model.eval()


Simple1DCNN(
  (conv1): Conv1d(1, 16, kernel_size=(5,), stride=(1,), padding=(2,))
  (relu): ReLU()
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(16, 32, kernel_size=(5,), stride=(1,), padding=(2,))
  (fc1): Linear(in_features=48000, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=75, bias=True)
)