In [None]:
#IMPORTS
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets
import os
from PIL import Image
import wandb
from datetime import datetime


wandb.login(key='fbe9062d8afc2237e9c82b76146a6be8f5683c2f')
wandb.init(project='3D-CNN', entity='detecting-respiratory-pattern')

model_save_dir = './model_checkpoints'
current_time = datetime.now().strftime('%Y%m%d-%H%M%S')
model_filename = f'best_model_{current_time}.pth'
model_save_path = os.path.join(model_save_dir, model_filename)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33myeneirvine[0m ([33mdetecting-respiratory-pattern[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/yeneirvine/.netrc
[34m[1mwandb[0m: Currently logged in as: [33myeneirvine[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
class BreathingDataset(Dataset):
    def __init__(self, root_dir, blanket_condition=None, distance=None, transform=None, max_frames=100):
        # 100 FRAMES are used from EACH SAMPLE. For samples with less than 100, padding is applied. This 250 number was chosen as a sort of balance between using a
        # lot of data but not using too much padding since that can bring about inaccuracies. Can play with this number for sure.
        """
        Initializes the dataset.

        :param root_dir: Base directory for the dataset (e.g., path to 'Training').
        :param blanket_condition: 'With Blankets' or 'Without Blankets', use None to include both.
        :param distance: '2 Meters' or '3 Meters', use None to include both distances.
        :param transform: Transformations to be applied to each image.
        :param max_frames: Maximum number of frames to use from each video sequence.
        """
        self.root_dir = root_dir
        self.blanket_condition = blanket_condition
        self.distance = distance
        self.transform = transform
        self.max_frames = max_frames
        self.samples = []

        conditions = ['With Blankets', 'Without Blankets'] if blanket_condition is None else [blanket_condition]
        distances = ['2 Meters', '3 Meters'] if distance is None else [distance]

        for condition in conditions:
            for dist in distances:
                for label in ['Hold Breath', 'Relaxed']:
                    label_path = os.path.join(root_dir, condition, dist, label)
                    for subject_path in os.listdir(label_path):
                        subject_full_path = os.path.join(label_path, subject_path)
                        if os.path.isdir(subject_full_path):
                            images = sorted([img for img in os.listdir(subject_full_path) if img.endswith('.jpg')],
                                            key=lambda x: int(x.split('.')[0]))
                            # Limit to the first max_frames frames
                            images = images[:self.max_frames]
                            image_paths = [os.path.join(subject_full_path, img) for img in images]
                            self.samples.append((image_paths, 0 if label == 'Hold Breath' else 1))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        images_path, label = self.samples[idx]
        images = [Image.open(img_path).convert('RGB') for img_path in images_path]
        # remove this potentially, since images should already be in RGB.

        if self.transform:
            images = [self.transform(image) for image in images]

        # Ensuring all sequences have the same number of frames (padding if necessary)
        if len(images) < self.max_frames:
            # Assuming the transformation to tensor has already been applied, adjust if not
            padding = [torch.zeros_like(images[0]) for _ in range(self.max_frames - len(images))]
            images += padding

        images_stack = torch.stack(images)
        return images_stack, label


In [None]:
# Define transform as 224, 224 is probably wrong, but just a place holder for now.
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [None]:
# THIS IS WHERE WE SPECIFY WHICH CONDITIONS / CONFIGURATIONS TO INCLUDE... Right now, its taking data WITHOUT BLANKETS and 2m and 3m (because distance = none)
train_dataset_without_blankets = BreathingDataset('/home/yene.irvine/rgb_10-fps/Train', blanket_condition='Without Blankets', distance=None, transform=transform)
val_dataset_without_blankets = BreathingDataset('/home/yene.irvine/rgb_10-fps/Validation', blanket_condition='Without Blankets', distance=None, transform=transform)
test_dataset_without_blankets = BreathingDataset('/home/yene.irvine/rgb_10-fps/Test', blanket_condition='Without Blankets', distance=None, transform=transform)

In [None]:
train_loader = DataLoader(train_dataset_without_blankets, batch_size=2, shuffle=True)  # Shuffle for training - this shuffles the samples (i.e. sets of frames, rather than individual frames.)
val_loader = DataLoader(val_dataset_without_blankets, batch_size=2, shuffle=False)  # No shuffle for validation
test_loader = DataLoader(test_dataset_without_blankets, batch_size=2, shuffle=False)  # No shuffle for testing

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Basic3DCNN(nn.Module):
    def __init__(self):
        super(Basic3DCNN, self).__init__()
        self.conv1 = nn.Conv3d(in_channels=3, out_channels=16, kernel_size=(3, 3, 3), stride=1, padding=1)
        self.pool = nn.MaxPool3d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv3d(16, 32, kernel_size=(3, 3, 3), stride=1, padding=1)
        # Updated size based on input dimensions after flattening
        self.fc1 = nn.Linear(2508800, 512)   # Adjusted to calculated size - 32 * 56* 56 * 24
        # TO DO: need to refactor this large number as a variable. 
        self.fc2 = nn.Linear(512, 2)  # Assuming 2 classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # Adjust the flattening operation to match the updated fc1 input size
        x = x.view(-1, 2508800)  # Flatten the tensor for the fully connected layer 
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
import torch.optim as optim

model = Basic3DCNN() # model instantiation
criterion = nn.CrossEntropyLoss() # loss function
optimizer = optim.Adam(model.parameters(), lr=0.0005) #optimizer

# Hyperparameters
num_epochs = 5
best_val_loss = float('inf')
patience = 3
early_stop_counter = 0

#Log the parameters (this is strictly for logging to wandb, it doesn't change anything in the model)
wandb.config = {
  "learning_rate": 0.0005,
  "epochs": num_epochs,
  "batch_size": 2
}

wandb.watch(model, log='all')

In [None]:
# Main Training Loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0

    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        inputs = inputs.permute(0, 2, 1, 3, 4)  # Adjust dimensions
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    # Log training loss
    wandb.log({"epoch": epoch, "train_loss": running_loss / len(train_loader)})

    # Validation Phase
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    val_corrects = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.permute(0, 2, 1, 3, 4)  # Adjust dimensions
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            val_corrects += torch.sum(preds == labels.data)

    val_loss /= len(val_loader)
    val_accuracy = val_corrects.double() / len(val_loader.dataset)
    wandb.log({"val_loss": val_loss, "val_accuracy": val_accuracy})

    print(f'Epoch {epoch+1}, Train Loss: {running_loss / len(train_loader)}, Val Loss: {val_loss}, Val Accuracy: {val_accuracy}')

    # Early Stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0  # Reset counter

        # Ensure the directory exists before saving the model
        os.makedirs(model_save_dir, exist_ok=True)

        # Save the best model
        torch.save(model.state_dict(), model_save_path)

    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

In [None]:
# Load the best model for testing
model.load_state_dict(torch.load(model_save_path))

# Testing Phase
test_loss = 0.0
test_corrects = 0
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.permute(0, 2, 1, 3, 4)  # Adjust dimensions
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        test_corrects += torch.sum(preds == labels.data)

test_loss /= len(test_loader)
test_accuracy = test_corrects.double() / len(test_loader.dataset)
wandb.log({"test_loss": test_loss, "test_accuracy": test_accuracy})
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')