In [1]:
import torchvision.models as models
import os
from PIL import Image
import numpy as np
import collections
import time

import torch
import torch.nn as nn
from torch.utils.data import random_split, DataLoader, TensorDataset, WeightedRandomSampler
from torchvision import transforms
import io
from tqdm import tqdm

In [2]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
print(os.listdir('/content/gdrive/MyDrive/RespiraCheck'))
print(os.path.exists('/content/gdrive/MyDrive/RespiraCheck/Cough Data/spectrograms/positive'))
print(os.path.exists('/content/gdrive/MyDrive/RespiraCheck/Cough Data/spectrograms/negative'))

Mounted at /content/gdrive
['Cough Data']
True
True


In [3]:
import torch
import torch.nn as nn
import torchvision.models as models


class CNNModel(nn.Module):
    """A convolutional neural network model based on EfficientNet for spectrogram processing."""

    def __init__(self, dropout: float = 0.0):
        """Initializes the CNNModel using EfficientNet-B3 with an optional dropout layer.

        Args:
            dropout (float): Dropout probability before the final classification layer.
        """
        super(CNNModel, self).__init__()

        # Load EfficientNet-B3 with pre-trained weights
        self.efficientnet = models.efficientnet_b3(weights='IMAGENET1K_V1')

        # Get the number of features from the last layer of EfficientNet
        num_features = self.efficientnet.classifier[1].in_features

        # Replace the classifier with a new one for binary classification (2 outputs)
        self.efficientnet.classifier = nn.Sequential(
            nn.Dropout(p=dropout),          # Dropout before final layer
            nn.Linear(num_features, 2)      # Output 2 logits (for CrossEntropyLoss)
        )

        # Initialize the new FC layer weights
        nn.init.normal_(self.efficientnet.classifier[1].weight, mean=0.0, std=0.01)
        nn.init.zeros_(self.efficientnet.classifier[1].bias)

    def forward(self, spectrogram: torch.Tensor) -> torch.Tensor:
        """Defines the forward pass for EfficientNet.

        Args:
            spectrogram (torch.Tensor): Input tensor representing the spectrogram.

        Returns:
            torch.Tensor: Output logits for both classes.
        """
        return self.efficientnet(spectrogram)


In [4]:
class ModelHandler:
    def __init__(self,
                 model,
                 model_path: str,
                 optimizer: torch.optim.Optimizer,
                 loss_function: nn.Module,
                 steps_per_decay=5,
                 lr_decay=0.1):
        self.model = model
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model_path = model_path
        self.optimizer = optimizer
        self.lr_scheduler = opt.lr_scheduler.StepLR(self.optimizer, step_size=steps_per_decay, gamma=lr_decay)
        self.loss_function = loss_function

    def train_step(self, dataloader):
        self.model.train()
        avg_loss, acc = 0, 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(self.device), labels.to(self.device)

            logits = self.model(inputs)
            loss = self.loss_function(logits, labels)
            avg_loss += loss.item()

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            preds = torch.argmax(logits, dim=1)
            acc += (preds == labels).float().mean().item()

        avg_loss /= len(dataloader)
        acc /= len(dataloader)
        return {"avg_loss_per_batch": avg_loss, "avg_acc_per_batch": acc * 100}

    def val_step(self, dataloader):
        self.model.eval()
        avg_loss, acc = 0, 0
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                logits = self.model(inputs)
                loss = self.loss_function(logits, labels)
                avg_loss += loss.item()

                preds = torch.argmax(logits, dim=1)
                acc += (preds == labels).float().mean().item()

        avg_loss /= len(dataloader)
        acc /= len(dataloader)
        return {"avg_loss_per_batch": avg_loss, "avg_acc_per_batch": acc * 100}

    def train(self, train_loader, val_loader, epochs: int, model_name: str):
        self.model.to(self.device)
        training_results = {"epoch": [], "loss": [], "accuracy": []}
        validation_results = {"epoch": [], "loss": [], "accuracy": []}

        for epoch in range(epochs):
            training_data = self.train_step(train_loader)
            validation_data = self.val_step(val_loader)

            training_results["epoch"].append(epoch)
            training_results["loss"].append(training_data["avg_loss_per_batch"])
            training_results["accuracy"].append(training_data["avg_acc_per_batch"])

            validation_results["epoch"].append(epoch)
            validation_results["loss"].append(validation_data["avg_loss_per_batch"])
            validation_results["accuracy"].append(validation_data["avg_acc_per_batch"])

            if self.lr_scheduler:
                self.lr_scheduler.step()

            print(f"{epoch}:")
            print(f"LR: {self.optimizer.param_groups[0]['lr']}")
            print(f"Loss - {training_data['avg_loss_per_batch']:.5f} | Accuracy - {training_data['avg_acc_per_batch']:.2f}%")
            print(f"VLoss - {validation_data['avg_loss_per_batch']:.5f} | VAccuracy - {validation_data['avg_acc_per_batch']:.2f}%\n")

        self.save_model(model_state_dict=self.model.state_dict(), model_name=model_name)
        return training_results, validation_results

    def evaluate(self, test_loader):
        self.model.eval()
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                logits = self.model(inputs)
                preds = torch.argmax(logits, dim=1)

                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        acc = accuracy_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds)
        print(f"Test Accuracy: {acc:.4f}, Test F1 Score: {f1:.4f}")
        return acc, f1

    def predict(self, spectrogram: torch.Tensor, model_name: str) -> int:
        self.load_model(self.model_path + f"/{model_name}")
        spectrogram = spectrogram.unsqueeze(0).to(self.device)

        with torch.no_grad():
            logits = self.model(spectrogram)
            prediction = torch.argmax(logits, dim=1)

        return prediction.item()

    def save_model(self, model_state_dict: collections.OrderedDict, model_name: str | None) -> None:
        path = self.model_path + "/" + model_name
        torch.save(model_state_dict, path)

    def load_model(self, path: str) -> None:
        self.model.load_state_dict(torch.load(path))
        self.model.to(self.device)
        self.model.eval()


In [5]:
class DataPipeline:
    """Processes datasets, including loading, splitting, and preparing for inference.

    This class provides methods for loading datasets, processing them for training,
    and preparing single instances for inference.

    Attributes:
        test_size (float): Proportion of the dataset to include in the test split.
        val_size (float): Proportion of the dataset to include for validation.
        audio_processor: AudioProcessor instance for handling audio processing.
        image_processor: ImageProcessor instance for handling spectrogram or extracted features processing.
    """

    def __init__(self, test_size: float, val_size: float):
        """Initializes the DatasetProcessor.

        Args:
            data_path (str): Path to the dataset file.
            test_size (float): Proportion of the dataset to include in the test split.
            audio_processor (AudioProcessor): Instance for handling audio processing.
            image_processor (ImageProcessor): Instance for handling spectrogram processing.
        """
        self.test_size = test_size
        self.val_size = val_size

    def load_dataset(self) -> TensorDataset:
        """Loads the dataset from the specified file path into a DataFrame."""
        tensors = []
        labels = []

        for label_folder, label_value in zip(["positive", "negative"], [1, 0]):
            spectrogram_folder = '/content/gdrive/MyDrive/RespiraCheck/Cough Data/spectrograms'
            output_dir = os.path.join(spectrogram_folder, label_folder)

            for image_name in tqdm(os.listdir(output_dir)):
                image_path = os.path.join(output_dir, image_name)
                image_tensor = self.image_to_tensor(image_path)

                tensors.append(image_tensor)
                labels.append(label_value)

        # Tensor of all features (N x D) - N is number of samples (377), D is feature dimension (3,224,224)
        X = torch.stack(tensors)
        # Tensor of all labels (N x 1) - 377x1
        y = torch.tensor(labels, dtype=torch.long)

        return TensorDataset(X, y)


    def image_to_tensor(self, image_path: str) -> torch.Tensor:
        """Converts a spectrogram image to a PyTorch tensor.

        Args:
            image_path (str): Path to the spectrogram image file.

        Returns:
            torch.Tensor: The PyTorch tensor representation of the image.
        """
        transform = transforms.Compose([
            transforms.Resize((224, 224)),  # Resize to ResNet18 input size
            transforms.ToTensor(),  # Convert image to tensor
        ])

        image = Image.open(image_path).convert("RGB") # Convert from RGBA to RGB
        tensor_image = transform(image)

        return tensor_image  # shape will be 3, 224, 224

    def create_dataloaders(self, batch_size, dataset_path=None, upsample=True):
        if dataset_path:
            dataset = torch.load(dataset_path, weights_only=False)
        else:
            dataset = self.load_dataset()

        test_size = round(self.test_size * len(dataset))
        val_size = round(self.val_size * len(dataset))
        train_size = len(dataset) - test_size - val_size

        train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

        train_labels_flat = []

        if upsample:
            labels = [label.item() for _, label in train_dataset]
            train_labels_flat = labels

            train_counts = {label: labels.count(label) for label in set(labels)}
            weights = torch.tensor([1.0 / train_counts[label] for label in labels])
            sampler = WeightedRandomSampler(weights, int(len(train_dataset) * 1.5))
            train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
        else:
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            train_labels_flat = [label.item() for _, label in train_dataset]

        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

        return train_loader, val_loader, test_loader, train_labels_flat

In [6]:
import torch.optim as opt

# Static hyperparameters
EPOCHS = 20

# Learning rate scheduler
STEPS_PER_LR_DECAY = 20
LR_DECAY = 0.5

# Model parameters
DROPOUT = 0.5

# Training
LOSS_FN = nn.BCEWithLogitsLoss()



In [7]:
model = CNNModel(DROPOUT)

Downloading: "https://download.pytorch.org/models/efficientnet_b3_rwightman-b3899882.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b3_rwightman-b3899882.pth
100%|██████████| 47.2M/47.2M [00:00<00:00, 111MB/s]


In [8]:
datapipeline = DataPipeline(test_size=0.15, val_size=0.15)
train_loader, val_loader, test_loader, train_labels = datapipeline.create_dataloaders(batch_size=8)


100%|██████████| 1409/1409 [01:19<00:00, 17.78it/s]
100%|██████████| 4274/4274 [04:47<00:00, 14.86it/s]


In [None]:
import torch.optim as opt
from torch.optim.lr_scheduler import CosineAnnealingLR
import torchaudio.transforms as T
import torch.nn.utils
import numpy as np

# Define fixed hyperparameters
batch_size = 16  # Reduced batch size for better generalization
learning_rate = 0.0002  # More stable learning rate
weight_decay = 5e-4  # Regularization strength
dropout_rate = 0.6  # Increased dropout to reduce overfitting
patience = 15  # Increased patience for early stopping

print(f"\n🚀 Training with batch size: {batch_size}, learning rate: {learning_rate}")

# Initialize model (Use custom CNNModel with EfficientNet-B0 backbone)
cnn_model = CNNModel(dropout=dropout_rate)

# Choose optimizer (SGD with momentum for better generalization)
optimizer = opt.SGD(params=cnn_model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay)

# Learning rate scheduler (CosineAnnealingLR for smooth LR decay)
scheduler = CosineAnnealingLR(optimizer, T_max=20)

# Data Augmentation (SpecAugment with additional Mixup)
def augment_spectrogram(spectrogram):
    spectrogram = T.FrequencyMasking(freq_mask_param=15)(spectrogram)
    spectrogram = T.TimeMasking(time_mask_param=25)(spectrogram)
    spectrogram = T.Vol(0.8)(spectrogram)  # Random volume adjustment
    return spectrogram

def mixup_data(x, y, alpha=0.2):
    lam = np.random.beta(alpha, alpha)
    index = torch.randperm(x.size(0))
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

# Create ModelHandler
model_handler = ModelHandler(model=cnn_model,
                             model_path="/content/gdrive/MyDrive/RespiraCheck/Cough Data",
                             optimizer=optimizer,
                             loss_function=LOSS_FN,
                             steps_per_decay=STEPS_PER_LR_DECAY,
                             lr_decay=LR_DECAY)

# Load dataset with the chosen batch size
datapipeline = DataPipeline(test_size=0.15, val_size=0.15)
train_loader, val_loader, test_loader = datapipeline.create_dataloaders(batch_size=batch_size, upsample=True)

# Debug: Check dataset distribution
print("\n📊 Dataset Split:")
print(f"- Training Samples: {len(train_loader.dataset)}")
print(f"- Validation Samples: {len(val_loader.dataset)}")
print(f"- Test Samples: {len(test_loader.dataset)}")

# Training loop with early stopping
best_val_loss = float("inf")
best_model = None
best_acc = 0.0
epochs_without_improvement = 0

for epoch in range(EPOCHS):
    print(f"\n🔄 Epoch {epoch+1}/{EPOCHS}")

    # Train
    train_results, val_results = model_handler.train(train_loader=train_loader, epochs=1, model_name="CNN_EfficientNet")
    train_loss = train_results["loss"][-1]  # Get the last recorded loss value

    # Validate
    val_acc, val_loss = model_handler.validate(val_loader, {"batch_size": batch_size, "lr": learning_rate})

    # Scheduler step
    scheduler.step()

    # Gradient Clipping to stabilize training
    torch.nn.utils.clip_grad_norm_(cnn_model.parameters(), max_norm=1.0)

    # Check if validation loss improved
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_acc = val_acc
        best_model = model_handler
        epochs_without_improvement = 0  # Reset counter if there's improvement
        print(f"✅ New best validation loss: {best_val_loss:.4f} | Validation accuracy: {val_acc*100:.2f}%")
    else:
        epochs_without_improvement += 1
        print(f"🔄 No improvement in validation loss for {epochs_without_improvement} epochs")

    # Early Stopping: Stop training if there's no improvement for `patience` epochs
    if epochs_without_improvement >= patience:
        print(f"⏹️ Early stopping triggered due to no improvement in validation loss.")
        break

# Final Testing
if best_model:
    test_acc = best_model.evaluate(test_loader)
    print(f"\n🎯 Test accuracy: {test_acc*100:.2f}% 🚀 Best model saved!")



🚀 Training with batch size: 16, learning rate: 0.0002


100%|██████████| 1409/1409 [00:39<00:00, 35.58it/s]
  1%|          | 44/4274 [00:10<16:02,  4.39it/s]


KeyboardInterrupt: 

In [None]:
import torch
import torch.optim as opt
from torch.optim.lr_scheduler import CosineAnnealingLR
import torchaudio.transforms as T
import torch.nn.utils
import numpy as np
from sklearn.metrics import f1_score, accuracy_score

# Define fixed hyperparameters
batch_size = 16  # Reduced batch size for better generalization
learning_rate = 0.0002  # More stable learning rate
weight_decay = 5e-4  # Regularization strength
dropout_rate = 0.6  # Increased dropout to reduce overfitting
patience = 15  # Early stopping patience

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n🚀 Training with batch size: {batch_size}, learning rate: {learning_rate}")

# Initialize model (Use custom CNNModel with EfficientNet-B0 backbone)
cnn_model = CNNModel(dropout=dropout_rate).to(device)  # Move model to device

# Choose optimizer (SGD with momentum for better generalization)
optimizer = opt.SGD(params=cnn_model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=weight_decay)

# Learning rate scheduler (CosineAnnealingLR for smooth LR decay)
scheduler = CosineAnnealingLR(optimizer, T_max=20)

# Log-Mel Spectrogram transformation
sample_rate = 16000  # Adjust based on your dataset's sample rate
n_fft = 400  # Number of samples per FFT
win_length = 400  # Length of the window
hop_length = 160  # Hop length (the number of samples between successive frames)
n_mels = 23  # Number of Mel bins
f_min = 0  # Minimum frequency
f_max = sample_rate // 2  # Maximum frequency (Nyquist)

# Log-Mel Spectrogram transformation
log_mel_transform = T.MelSpectrogram(
    sample_rate=sample_rate,
    n_fft=n_fft,
    win_length=win_length,
    hop_length=hop_length,
    n_mels=n_mels,
    f_min=f_min,
    f_max=f_max
)

# Logarithmic compression
log_transform = T.AmplitudeToDB(stype='power')

# Function to compute Log-Mel Spectrogram
def compute_log_mel_spectrogram(audio_waveform):
    mel_spectrogram = log_mel_transform(audio_waveform)
    log_mel_spectrogram = log_transform(mel_spectrogram)
    return log_mel_spectrogram

# Data Augmentation (SpecAugment + Mixup)
def augment_spectrogram(spectrogram):
    # Apply Log-Mel Spectrogram transformation here
    spectrogram = compute_log_mel_spectrogram(spectrogram)

    # SpecAugment augmentations
    spectrogram = T.FrequencyMasking(freq_mask_param=15)(spectrogram)  # Frequency Masking
    spectrogram = T.TimeMasking(time_mask_param=25)(spectrogram)  # Time Masking
    spectrogram = T.Vol(0.8)(spectrogram)  # Random volume adjustment

    return spectrogram

def mixup_data(x, y, alpha=0.2):
    lam = np.random.beta(alpha, alpha)
    index = torch.randperm(x.size(0))
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

# Create ModelHandler
model_handler = ModelHandler(
    model=cnn_model,
    model_path="/content/gdrive/MyDrive/RespiraCheck/Cough Data",
    optimizer=optimizer,
    loss_function=LOSS_FN,
    steps_per_decay=STEPS_PER_LR_DECAY,
    lr_decay=LR_DECAY
)

# Load dataset with the chosen batch size
datapipeline = DataPipeline(test_size=0.15, val_size=0.15)
train_loader, val_loader, test_loader = datapipeline.create_dataloaders(batch_size=batch_size, upsample=True)

# Debug: Check dataset distribution
print("\n📊 Dataset Split:")
print(f"- Training Samples: {len(train_loader.dataset)}")
print(f"- Validation Samples: {len(val_loader.dataset)}")
print(f"- Test Samples: {len(test_loader.dataset)}")

# Function to Validate and Compute F1-score
def validate(model_handler, val_loader):
    model_handler.model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for batch in val_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)  # Move to device
            outputs = model_handler.model(inputs)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    val_acc = accuracy_score(all_targets, all_preds)
    val_f1 = f1_score(all_targets, all_preds, average="weighted")

    return val_acc, val_f1

# Training loop with early stopping
best_val_loss = float("inf")
best_model = None
best_acc = 0.0
best_f1_score = 0.0
epochs_without_improvement = 0

for epoch in range(EPOCHS):
    print(f"\n🔄 Epoch {epoch+1}/{EPOCHS}")

    # Train
    train_results, val_results = model_handler.train(train_loader=train_loader, epochs=1, model_name="CNN_EfficientNet")

    # Validate and Compute F1-score
    val_acc, val_f1 = validate(model_handler, val_loader)

    # Scheduler step
    scheduler.step()

    print(f"📊 Validation Accuracy: {val_acc*100:.2f}% | F1-score: {val_f1:.4f}")

    # Check for improvement
    if val_f1 > best_f1_score:
        best_f1_score = val_f1
        best_model = model_handler
        epochs_without_improvement = 0  # Reset counter if there's improvement
        print(f"✅ New best F1-score: {best_f1_score:.4f}")
    else:
        epochs_without_improvement += 1
        print(f"🔄 No improvement in F1-score for {epochs_without_improvement} epochs")

    # Early Stopping
    if epochs_without_improvement >= patience:
        print(f"⏹️ Early stopping triggered due to no improvement in F1-score.")
        break

# Function to Evaluate Model on Test Set
def evaluate(model_handler, test_loader):
    model_handler.model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)  # Move to device
            outputs = model_handler.model(inputs)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    test_acc = accuracy_score(all_targets, all_preds)
    test_f1 = f1_score(all_targets, all_preds, average="weighted")

    print(f"\n🎯 Test Accuracy: {test_acc*100:.2f}% | Test F1-score: {test_f1:.4f} 🚀")
    return test_acc, test_f1

# Final Testing
if best_model:
    test_acc, test_f1 = evaluate(best_model, test_loader)
    print(f"\n🎯 Test Accuracy: {test_acc*100:.2f}% | Test F1-score: {test_f1:.4f} 🚀 Best model saved!")


🚀 Training with batch size: 16, learning rate: 0.0002
Processing and loading dataset


100%|██████████| 1409/1409 [00:27<00:00, 50.32it/s]
100%|██████████| 4274/4274 [01:44<00:00, 41.04it/s]


Upsampling data

📊 Dataset Split:
- Training Samples: 3979
- Validation Samples: 852
- Test Samples: 852

🔄 Epoch 1/20
0:
LR: 0.0003
Loss - 0.58544 | Accuracy - 68.62%
VLoss - 0.69999 | VAccuracy - 65.86%

📊 Validation Accuracy: 75.00% | F1-score: 0.6429
✅ New best F1-score: 0.6429

🔄 Epoch 2/20
0:
LR: 0.00029815325108927063
Loss - 0.38090 | Accuracy - 83.85%
VLoss - 0.85533 | VAccuracy - 60.42%

📊 Validation Accuracy: 75.00% | F1-score: 0.6429
🔄 No improvement in F1-score for 1 epochs

🔄 Epoch 3/20
0:
LR: 0.00029265847744427303
Loss - 0.23048 | Accuracy - 91.09%
VLoss - 0.85182 | VAccuracy - 68.29%

📊 Validation Accuracy: 75.00% | F1-score: 0.6429
🔄 No improvement in F1-score for 2 epochs

🔄 Epoch 4/20
0:
LR: 0.0002836509786282552
Loss - 0.14031 | Accuracy - 94.44%
VLoss - 1.08216 | VAccuracy - 67.59%

📊 Validation Accuracy: 75.00% | F1-score: 0.6429
🔄 No improvement in F1-score for 3 epochs

🔄 Epoch 5/20
0:
LR: 0.0002713525491562421
Loss - 0.11338 | Accuracy - 96.15%
VLoss - 1.18663 

In [None]:
import torch
import torch.optim as opt
from torch.optim.lr_scheduler import CosineAnnealingLR
import torchaudio.transforms as T
import torch.nn.utils
import numpy as np
from sklearn.metrics import f1_score, accuracy_score
from sklearn.utils.class_weight import compute_class_weight

# Fixed hyperparameters
batch_size = 16
learning_rate = 0.0002
weight_decay = 5e-4
dropout_rate = 0.6
patience = 15
EPOCHS = 20

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"\n🚀 Training with batch size: {batch_size}, learning rate: {learning_rate}")

# Model init
cnn_model = CNNModel(dropout=dropout_rate).to(device)
optimizer = torch.optim.AdamW(cnn_model.parameters(), lr=0.0003, weight_decay=weight_decay)
scheduler = CosineAnnealingLR(optimizer, T_max=20)

# Log-Mel Spectrogram transformation
sample_rate = 16000
n_fft = 400
win_length = 400
hop_length = 160
n_mels = 23
f_min = 0
f_max = sample_rate // 2

log_mel_transform = T.MelSpectrogram(
    sample_rate=sample_rate,
    n_fft=n_fft,
    win_length=win_length,
    hop_length=hop_length,
    n_mels=n_mels,
    f_min=f_min,
    f_max=f_max
)
log_transform = T.AmplitudeToDB(stype='power')

def compute_log_mel_spectrogram(audio_waveform):
    mel_spectrogram = log_mel_transform(audio_waveform)
    log_mel_spectrogram = log_transform(mel_spectrogram)
    return log_mel_spectrogram

def augment_spectrogram(spectrogram):
    spectrogram = compute_log_mel_spectrogram(spectrogram)
    spectrogram = T.FrequencyMasking(freq_mask_param=10)(spectrogram)
    spectrogram = T.TimeMasking(time_mask_param=20)(spectrogram)
    return spectrogram

# Load data
datapipeline = DataPipeline(test_size=0.15, val_size=0.15)
train_loader, val_loader, test_loader, all_train_labels = datapipeline.create_dataloaders(batch_size=batch_size, upsample=True)

# Compute class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(all_train_labels), y=all_train_labels)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)


# Define loss
loss_function = torch.nn.CrossEntropyLoss(weight=class_weights_tensor)

# Initialize model handler
model_handler = ModelHandler(
    model=cnn_model,
    model_path="/content/gdrive/MyDrive/RespiraCheck/Cough Data",
    optimizer=optimizer,
    loss_function=loss_function,
    steps_per_decay=STEPS_PER_LR_DECAY,
    lr_decay=LR_DECAY
)

# Validation function
def validate(model_handler, val_loader):
    model_handler.model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for batch in val_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model_handler.model(inputs)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    all_preds = np.array(all_preds)
    all_targets = np.array(all_targets)

    val_acc = accuracy_score(all_targets, all_preds)

    tp = np.sum((all_preds == 1) & (all_targets == 1))
    fp = np.sum((all_preds == 1) & (all_targets == 0))
    fn = np.sum((all_preds == 0) & (all_targets == 1))
    tn = np.sum((all_preds == 0) & (all_targets == 0))

    precision = tp / (tp + fp) if (tp + fp) != 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) != 0 else 0.0
    val_f1 = 2 * precision * recall / (precision + recall) if (precision + recall) != 0 else 0.0

    print(f"📊 Validation Accuracy: {val_acc*100:.2f}% | F1-score: {val_f1:.4f}")
    print(f"TP: {tp} | FP: {fp} | FN: {fn} | TN: {tn} | Precision: {precision:.4f} | Recall: {recall:.4f}")

    return val_acc, val_f1

# Evaluation
def evaluate(model_handler, test_loader):
    model_handler.model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for batch in test_loader:
            inputs, targets = batch
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model_handler.model(inputs)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    all_preds = np.array(all_preds)
    all_targets = np.array(all_targets)

    test_acc = accuracy_score(all_targets, all_preds)

    tp = np.sum((all_preds == 1) & (all_targets == 1))
    fp = np.sum((all_preds == 1) & (all_targets == 0))
    fn = np.sum((all_preds == 0) & (all_targets == 1))
    tn = np.sum((all_preds == 0) & (all_targets == 0))

    precision = tp / (tp + fp) if (tp + fp) != 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) != 0 else 0.0
    test_f1 = 2 * precision * recall / (precision + recall) if (precision + recall) != 0 else 0.0

    print(f"\n🎯 Test Accuracy: {test_acc*100:.2f}% | Test F1-score: {test_f1:.4f} 🚀")
    print(f"TP: {tp} | FP: {fp} | FN: {fn} | TN: {tn} | Precision: {precision:.4f} | Recall: {recall:.4f}")
    return test_acc, test_f1

# Training loop
best_f1_score = 0.0
epochs_without_improvement = 0

for epoch in range(EPOCHS):
    print(f"\n🔄 Epoch {epoch+1}/{EPOCHS}")
   # for batch in train_loader:
   #     inputs, targets = batch
   #     inputs = augment_spectrogram(inputs).to(device)
   #     targets = targets.to(device)

    train_results, val_results = model_handler.train(train_loader=train_loader, val_loader=val_loader,epochs=1, model_name="CNN_EfficientNet")
    val_acc, val_f1 = validate(model_handler, val_loader)
    scheduler.step()

    if val_f1 > best_f1_score:
        best_f1_score = val_f1
        best_model = model_handler
        epochs_without_improvement = 0
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= patience:
        print(f"⏹️ Early stopping triggered due to no improvement in F1-score.")
        break

# Final evaluation
if best_model:
    test_acc, test_f1 = evaluate(best_model, test_loader)
    print(f"\n🎯 Test Accuracy: {test_acc*100:.2f}% | Test F1-score: {test_f1:.4f} 🚀 Best model saved!")



🚀 Training with batch size: 16, learning rate: 0.0002


100%|██████████| 1409/1409 [00:38<00:00, 36.22it/s]
100%|██████████| 4274/4274 [02:18<00:00, 30.89it/s]



🔄 Epoch 1/20
