In [1]:
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# Load the CSV data
data_path = '/Users/rachelwang/Downloads/notes/models/csv/adherence_labeled_with_image.csv'
data = pd.read_csv(data_path)
data.head()

Unnamed: 0,pid,task,adherence,file,spectrogram
0,1f9475bb-f13b-4f68-969b-28f20455b3e7,Loudness,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
1,1f9475bb-f13b-4f68-969b-28f20455b3e7,Respiration-and-cough-Breath-2,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
2,1f9475bb-f13b-4f68-969b-28f20455b3e7,Respiration-and-cough-FiveBreaths-1,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
3,1f9475bb-f13b-4f68-969b-28f20455b3e7,Respiration-and-cough-ThreeQuickBreaths-2,4.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
4,1f9475bb-f13b-4f68-969b-28f20455b3e7,Maximum-phonation-time-1,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...


In [2]:
# Count the occurrences of each adherence label
label_counts = data['adherence'].value_counts().sort_index()
print("Adherence label counts:")
print(label_counts)

Adherence label counts:
adherence
1.0     20
2.0     11
3.0     25
4.0     90
5.0    395
Name: count, dtype: int64


In [3]:
# Load the split CSV files
train_csv_path = '/Users/rachelwang/Downloads/notes/models/train_data_adherence.csv'
val_csv_path = '/Users/rachelwang/Downloads/notes/models/val_data_adherence.csv'
test_csv_path = '/Users/rachelwang/Downloads/notes/models/test_data_adherence.csv'

train_df = pd.read_csv(train_csv_path)
val_df = pd.read_csv(val_csv_path)
test_df = pd.read_csv(test_csv_path)

In [4]:
# Define the dataset class
class SpectrogramDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['spectrogram']
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        label = self.dataframe.iloc[idx]['adherence'] - 1  # 1-based to 0-based label
        return image, label

In [5]:
# Define the transformations with augmentation
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(10),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [6]:
# Create datasets and dataloaders
train_dataset = SpectrogramDataset(train_df, transform=transform)
val_dataset = SpectrogramDataset(val_df, transform=transform)
test_dataset = SpectrogramDataset(test_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [7]:
# Load the ResNet model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 5)
model = model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /Users/rachelwang/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:03<00:00, 26.6MB/s]


In [12]:
# Training loop
num_epochs = 50
best_model_path = 'resnet_best_model_adherence.pth'
best_val_loss = float('inf')
early_stop_patience = 10
early_stop_counter = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    print(f"Starting epoch {epoch + 1}/{num_epochs}")
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device).long()  # Convert labels to LongTensor

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        if batch_idx % 10 == 0:
            print(f"Batch {batch_idx}, Loss: {loss.item()}")

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device).long()  # Convert labels to LongTensor

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * val_correct / val_total

    print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0
        torch.save(model.state_dict(), best_model_path)
    else:
        early_stop_counter += 1
        if early_stop_counter >= early_stop_patience:
            print("Early stopping triggered.")
            break

print("Training complete.")
print(f"Best model saved at: {best_model_path}")

Starting epoch 1/50
Batch 0, Loss: 0.7622745037078857
Batch 10, Loss: 0.452995240688324
Epoch [1/50], Loss: 0.7920, Accuracy: 74.60%
Validation Loss: 0.7453, Validation Accuracy: 76.85%
Starting epoch 2/50
Batch 0, Loss: 0.9998501539230347
Batch 10, Loss: 0.5594660043716431
Epoch [2/50], Loss: 0.8097, Accuracy: 73.67%
Validation Loss: 0.7464, Validation Accuracy: 76.85%
Starting epoch 3/50
Batch 0, Loss: 0.7110664248466492
Batch 10, Loss: 0.7684535384178162
Epoch [3/50], Loss: 0.7782, Accuracy: 74.60%
Validation Loss: 0.7424, Validation Accuracy: 76.85%
Starting epoch 4/50
Batch 0, Loss: 0.7279840707778931
Batch 10, Loss: 0.5718775987625122
Epoch [4/50], Loss: 0.8006, Accuracy: 74.60%
Validation Loss: 0.7608, Validation Accuracy: 76.85%
Starting epoch 5/50
Batch 0, Loss: 0.7147996425628662
Batch 10, Loss: 1.0089263916015625
Epoch [5/50], Loss: 0.7964, Accuracy: 74.13%
Validation Loss: 0.7540, Validation Accuracy: 76.85%
Starting epoch 6/50
Batch 0, Loss: 0.8765460252761841
Batch 10, Lo

In [13]:
# Load the saved model
model.load_state_dict(torch.load(best_model_path))

# Evaluate on test set
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device).long()  # Convert labels to LongTensor

        outputs = model(images)
        loss = criterion(outputs, labels)

        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_loss /= len(test_loader)
test_accuracy = 100 * test_correct / test_total

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")

Test Loss: 0.8331, Test Accuracy: 75.23%
