In [2]:
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image
import timm
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

# Load the dataset
new_data_path = '/Users/rachelwang/Downloads/notes/models/csv/adherence_labeled_with_image.csv'
new_data = pd.read_csv(new_data_path)

# Verify the loaded data
new_data.head()

Unnamed: 0,pid,task,adherence,file,spectrogram
0,1f9475bb-f13b-4f68-969b-28f20455b3e7,Loudness,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
1,1f9475bb-f13b-4f68-969b-28f20455b3e7,Respiration-and-cough-Breath-2,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
2,1f9475bb-f13b-4f68-969b-28f20455b3e7,Respiration-and-cough-FiveBreaths-1,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
3,1f9475bb-f13b-4f68-969b-28f20455b3e7,Respiration-and-cough-ThreeQuickBreaths-2,4.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...
4,1f9475bb-f13b-4f68-969b-28f20455b3e7,Maximum-phonation-time-1,5.0,/Users/rachelwang/Downloads/bids_with_sensitiv...,/Users/rachelwang/Downloads/notes/models/adher...


In [3]:
# Define the dataset class
class SpectrogramDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]['spectrogram']
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        label = self.dataframe.iloc[idx].get('adherence', -1) - 1  # Default to -1 if 'adherence' not present
        return image, label

In [4]:
# Define transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create the dataset
full_dataset = SpectrogramDataset(new_data, transform=transform)

In [9]:
# Define model, criterion, optimizer, and scheduler
def create_model():
    model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=5)
    model = model.to(device)
    return model

criterion = nn.CrossEntropyLoss()

def train_and_evaluate_model(train_loader, val_loader):
    model = create_model()
    optimizer = optim.AdamW(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.1)
    
    best_val_loss = float('inf')
    early_stop_patience = 10
    early_stop_counter = 0
    num_epochs = 50
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device).long()
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")
        
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device).long()
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
        
        val_loss /= len(val_loader)
        val_accuracy = 100 * val_correct / val_total
        print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")
        
        scheduler.step(val_loss)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), 'efficientnet_best_model_adherence_k_fold.pth')
        else:
            early_stop_counter += 1
            if early_stop_counter >= early_stop_patience:
                print("Early stopping triggered.")
                break
    
    return model

In [10]:
# Cross-validation setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
fold_results = []

for fold, (train_idx, val_idx) in enumerate(kf.split(full_dataset)):
    print(f"Fold {fold + 1}")
    
    train_subset = Subset(full_dataset, train_idx)
    val_subset = Subset(full_dataset, val_idx)
    
    train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
    
    model = train_and_evaluate_model(train_loader, val_loader)
    
    # Evaluation on validation set
    model.eval()
    val_true = []
    val_pred = []
    
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device).long()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            val_true.extend(labels.cpu().numpy())
            val_pred.extend(predicted.cpu().numpy())
    
    val_accuracy = accuracy_score(val_true, val_pred)
    fold_results.append(val_accuracy)
    print(f"Fold {fold + 1} Validation Accuracy: {val_accuracy:.2f}")


Fold 1
Epoch [1/50], Loss: 2.5193, Accuracy: 50.46%
Validation Loss: 1.6699, Validation Accuracy: 57.80%
Epoch [2/50], Loss: 1.6206, Accuracy: 65.74%
Validation Loss: 1.8790, Validation Accuracy: 52.29%
Epoch [3/50], Loss: 1.4405, Accuracy: 65.51%
Validation Loss: 1.2940, Validation Accuracy: 66.06%
Epoch [4/50], Loss: 1.0301, Accuracy: 67.82%
Validation Loss: 1.5216, Validation Accuracy: 77.06%
Epoch [5/50], Loss: 0.8677, Accuracy: 72.22%
Validation Loss: 1.2489, Validation Accuracy: 76.15%
Epoch [6/50], Loss: 0.6842, Accuracy: 74.77%
Validation Loss: 1.3141, Validation Accuracy: 68.81%
Epoch [7/50], Loss: 0.6901, Accuracy: 79.17%
Validation Loss: 1.2700, Validation Accuracy: 73.39%
Epoch [8/50], Loss: 0.6338, Accuracy: 79.17%
Validation Loss: 1.3711, Validation Accuracy: 73.39%
Epoch [9/50], Loss: 0.4565, Accuracy: 86.11%
Validation Loss: 0.9695, Validation Accuracy: 80.73%
Epoch [10/50], Loss: 0.4356, Accuracy: 84.72%
Validation Loss: 1.0763, Validation Accuracy: 66.97%
Epoch [11/50

In [11]:
import numpy as np
# Print cross-validation results
print("Cross-validation results:")
for i, accuracy in enumerate(fold_results):
    print(f"Fold {i + 1}: {accuracy:.2f}")

print(f"Average cross-validation accuracy: {np.mean(fold_results):.2f}")

Cross-validation results:
Fold 1: 0.75
Fold 2: 0.78
Fold 3: 0.66
Fold 4: 0.67
Fold 5: 0.74
Average cross-validation accuracy: 0.72


In [12]:
# Final prediction on the entire dataset (if needed)
# Create a dataset and dataloader for the complete dataset
non_labeled_data = pd.read_csv('/Users/rachelwang/Downloads/notes/models/csv/adherence_non_labeled_with_image.csv')
full_dataset = SpectrogramDataset(non_labeled_data, transform=transform)
full_loader = DataLoader(full_dataset, batch_size=32, shuffle=False)

# Load the best model and predict labels for the complete data
model.load_state_dict(torch.load('efficientnet_best_model_adherence_k_fold.pth'))

model.eval()
predictions = []

with torch.no_grad():
    for images, _ in full_loader:
        images = images.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.cpu().numpy())

# Add the predictions to the new data
non_labeled_data['adherence'] = [p + 1 for p in predictions]  # Convert back to 1-based labels

# Save the new data with predictions
new_data_with_predictions_path = '/Users/rachelwang/Downloads/notes/models/csv/predicted_adherence_3.csv'
non_labeled_data.to_csv(new_data_with_predictions_path, index=False)

print(f"Predictions saved to: {new_data_with_predictions_path}")

Predictions saved to: /Users/rachelwang/Downloads/notes/models/csv/predicted_adherence_3.csv


In [13]:
# Count the occurrences of each adherence label
label_counts = non_labeled_data['adherence'].value_counts().sort_index()
# Print the counts for each label
print("Adherence label counts:")
print(label_counts)

Adherence label counts:
adherence
1      88
2      28
3      85
4     366
5    2930
Name: count, dtype: int64
