In [None]:
import os  
import pickle  
import numpy as np  
import pandas as pd
import torch  
import torch.optim as optim 
import torch.nn as nn  
import matplotlib.pyplot as plt 
from torch.utils.data import Dataset, DataLoader  
from sklearn.metrics import (
    roc_curve, auc, accuracy_score, precision_score, recall_score,
    f1_score, balanced_accuracy_score, confusion_matrix
)  
from transformers import ASTFeatureExtractor, ASTForAudioClassification 
from collections import defaultdict  
from google.colab import drive 
import librosa 
import cv2  
from torchvision import transforms 
import seaborn as sns

# Mount Google Drive to access the dataset 
drive.mount('/content/drive')  


tesi_path = '/path/to/folder'
os.chdir(tesi_path)
print("Current working directory:", os.getcwd())

file_path = '/path/to/newdata_updated.pkl'
with open(file_path, 'rb') as f:
    newdata = pickle.load(f)

print("Pickle file loaded successfully.")
print(f"Total samples in dataset: {len(newdata)}")

feature_extractor = ASTFeatureExtractor.from_pretrained(
    'MIT/ast-finetuned-audioset-10-10-0.4593',
    sampling_rate=16000,
    return_attention_mask=False
)

for item in newdata:
    audio = np.array(item['audio'])
    audio = audio / np.max(np.abs(audio))
    item['audio'] = audio

print(f"Example audio shape: {newdata[0]['audio'].shape}")


def print_updrs_distribution(data, label_key='label', updrs_keys=['updrs', 'UPDRS']):
    updrs_counts = defaultdict(int)
    control_count, parkinsonian_count = 0, 0

    for item in data:
        if label_key in item:
            label = item[label_key]
            if label == 0:
                control_count += 1
            elif label == 1:
                parkinsonian_count += 1

        updrs_value = None
        for key in updrs_keys:
            if key in item:
                updrs_value = item[key]
                break

        if updrs_value is not None:
            updrs_counts[updrs_value] += 1

    print(f"Number of controls: {control_count}")
    print(f"Number of Parkinsonians: {parkinsonian_count}")
    print("UPDRS Distribution:")
    for updrs_value, count in sorted(updrs_counts.items()):
        print(f"  UPDRS {updrs_value}: {count}")


sampling_rate = 44100
audio_lengths = [len(item['audio']) for item in newdata]
audio_lengths_sec = [length / sampling_rate for length in audio_lengths]

plt.figure(figsize=(10, 6))
plt.hist(audio_lengths_sec, bins=50, color='blue', alpha=0.7)
plt.title('Distribution of Audio Lengths (Before Processing)')
plt.xlabel('Audio Length (seconds)')
plt.ylabel('Count')
plt.show()

print(f"Number of audios: {len(audio_lengths_sec)}")
print(f"Minimum audio length: {min(audio_lengths_sec):.2f} seconds")
print(f"Maximum audio length: {max(audio_lengths_sec):.2f} seconds")

q1 = np.percentile(audio_lengths, 25)
q3 = np.percentile(audio_lengths, 75)
iqr = q3 - q1
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr

filtered_data = [item for item in newdata if lower_bound <= len(item['audio']) <= upper_bound]
filtered_audio_lengths = [len(item['audio']) for item in filtered_data]
filtered_audio_lengths_sec = [length / sampling_rate for length in filtered_audio_lengths]

plt.figure(figsize=(10, 6))
plt.hist(filtered_audio_lengths_sec, bins=50, color='green', alpha=0.7)
plt.title('Distribution of Audio Lengths (After Removing Outliers)')
plt.xlabel('Audio Length (seconds)')
plt.ylabel('Count')
plt.show()

print(f"Number of audios after removing outliers: {len(filtered_audio_lengths_sec)}")
print(f"Minimum audio length after filtering: {min(filtered_audio_lengths_sec):.2f} seconds")
print(f"Maximum audio length after filtering: {max(filtered_audio_lengths_sec):.2f} seconds")

min_length = min(filtered_audio_lengths)
print(f"The shortest audio length is: {min_length / sampling_rate:.2f} seconds ({min_length} samples)")

print("Distribution BEFORE outlier removal:")
print_updrs_distribution(newdata)
print("Distribution AFTER outlier removal:")
print_updrs_distribution(filtered_data)

newdata = filtered_data


def extract_first_last_halves(audio, min_length):
    n_half = min_length // 2
    if len(audio) >= min_length:
        first_half = audio[:n_half]
        last_half = audio[-n_half:]
    else:
        padding = min_length - len(audio)
        audio_padded = np.pad(audio, (0, padding), 'constant')
        first_half = audio_padded[:n_half]
        last_half = audio_padded[-n_half:]
    return first_half, last_half


for item in newdata:
    audio = np.array(item['audio'])
    first_half, last_half = extract_first_last_halves(audio, min_length)
    item['audio_first'] = first_half
    item['audio_last'] = last_half
    del item['audio']


class AudioDataset(Dataset):
    def __init__(self, data, extractor, augment=False):
        self.data = data
        self.extractor = extractor
        self.augment = augment

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        audio_first = sample['audio_first']
        audio_last = sample['audio_last']
        label = sample['label']

        if 'updrs' in sample:
            updrs_value = sample['updrs']
        elif 'UPDRS' in sample:
            updrs_value = sample['UPDRS']
        else:
            updrs_value = -1

        metadata = {'updrs': updrs_value}

        inputs_first = self.extractor(audio_first, sampling_rate=16000, return_tensors="pt", padding=True)
        input_values_first = inputs_first['input_values'].squeeze(0)

        inputs_last = self.extractor(audio_last, sampling_rate=16000, return_tensors="pt", padding=True)
        input_values_last = inputs_last['input_values'].squeeze(0)

        return input_values_first, input_values_last, label, metadata


def stratified_group_split(all_ids, grouped_by_id, label_key='label'):
    controls = [id_ for id_ in all_ids if grouped_by_id[id_][0][label_key] == 0]
    parkinsons = [id_ for id_ in all_ids if grouped_by_id[id_][0][label_key] == 1]

    random.shuffle(controls)
    random.shuffle(parkinsons)

    num_folds = 5
    split_controls = len(controls) // num_folds
    split_parkinsons = len(parkinsons) // num_folds

    folds = []
    for i in range(num_folds):
        fold_controls = controls[i * split_controls:(i + 1) * split_controls]
        fold_parkinsons = parkinsons[i * split_parkinsons:(i + 1) * split_parkinsons]
        folds.append(fold_controls + fold_parkinsons)

    return folds


class DualInputASTModel(nn.Module):
    def __init__(self, pretrained_model_name='MIT/ast-finetuned-audioset-10-10-0.4593', num_labels=2):
        super(DualInputASTModel, self).__init__()
        self.ast = ASTForAudioClassification.from_pretrained(
            pretrained_model_name,
            num_labels=num_labels,
            ignore_mismatched_sizes=True
        )
        self.dropout1 = nn.Dropout(p=0.3)
        self.dropout2 = nn.Dropout(p=0.3)

        for param in self.ast.parameters():
            param.requires_grad = False
        for name, param in self.ast.named_parameters():
            if 'encoder.layer.10' in name or 'encoder.layer.11' in name or 'classifier' in name:
                param.requires_grad = True

    def forward(self, input_values_first, input_values_last):
        outputs_first = self.dropout1(self.ast(input_values_first).logits)
        outputs_last = self.dropout2(self.ast(input_values_last).logits)
        combined_logits = (outputs_first + outputs_last) / 2
        return combined_logits


In [None]:
def analyze_updrs(val_loader, model, device):
    """
    Analyze logits and probabilities by UPDRS levels for AST model with dual inputs.
    """
    updrs_results = {0: [], 1: [], 2: [], 3: [], 4: []}
    control_stats = {'correct': 0, 'total': 0, 'control_prob': []}

    with torch.no_grad():
        for inputs_first, inputs_last, labels, metadata in val_loader:
            inputs_first, inputs_last, labels = (
                inputs_first.to(device),
                inputs_last.to(device),
                labels.to(device)
            )

            logits = model(inputs_first, inputs_last)
            probabilities = torch.softmax(logits, dim=1)[:, 1]
            logits = logits[:, 1]

            for i, prob in enumerate(probabilities.cpu().numpy()):
                logit = logits[i].item()
                updrs_value = None
                if 'updrs' in metadata:
                    updrs_value = metadata['updrs'][i]
                elif 'UPDRS' in metadata:
                    updrs_value = metadata['UPDRS'][i]

                if updrs_value is not None:
                    if isinstance(updrs_value, torch.Tensor):
                        updrs_value = updrs_value.item()
                    else:
                        updrs_value = int(updrs_value)

                if updrs_value == -1 or updrs_value is None:
                    continue

                updrs_results[updrs_value].append((logit, prob, labels[i].item()))

    return updrs_results




In [None]:
def aggregate_updrs_results(fold_results):
    """
    Aggregates UPDRS results across all folds.
    """
    aggregated_results = {0: [], 1: [], 2: [], 3: [], 4: []}
    probabilities_by_updrs = {0: [], 1: [], 2: [], 3: [], 4: []}

    for fold in fold_results:
        updrs_results = fold['updrs_results']
        for level, values in updrs_results.items():
            aggregated_results[level].extend(values)
            probabilities_by_updrs[level].extend([prob for _, prob, _ in values])

    metrics = {}
    for level, results in aggregated_results.items():
        if results:
            logits, probs, true_labels = zip(*results)
            mean_prob = np.mean(probs)
            mean_logit = np.mean(logits)
            total_count = len(results)
            classified_as_parkinsonian = sum(1 for prob, label in zip(probs, true_labels) if prob >= 0.5 and label == 1)
            percentage_classified_as_parkinsonian = (classified_as_parkinsonian / total_count) * 100

            metrics[level] = {
                'total_count': total_count,
                'mean_probability': mean_prob,
                'mean_logit': mean_logit,
                'percentage_classified_as_parkinsonian': percentage_classified_as_parkinsonian,
            }
        else:
            metrics[level] = {
                'total_count': 0,
                'mean_probability': 0.0,
                'mean_logit': 0.0,
                'percentage_classified_as_parkinsonian': 0.0,
            }

    return metrics, probabilities_by_updrs


In [None]:
num_epochs = 100
early_stopping_patience = 10
learning_rate = 1e-4
dropout_rate = 0.3
gamma = 0.995

fold_results = []
train_accuracies, val_accuracies = [], []
train_precisions, val_precisions = [], []
train_recalls, val_recalls = [], []
train_f1s, val_f1s = [], []
train_losses, val_losses = [], []

criterion = nn.CrossEntropyLoss()

def compute_confusion_matrix_metrics(labels, preds, fold_num):
    cm = confusion_matrix(labels, preds)
    if cm.shape == (2, 2):
        tn, fp, fn, tp = cm.ravel()
    else:
        tn, fp, fn, tp = 0, 0, 0, 0
        if len(cm) == 1:
            if cm[0][0] == 0:
                fn = cm[0][1]
            else:
                tn = cm[0][0]
    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    balanced_acc = (sensitivity + specificity) / 2
    print(f"\nConfusion Matrix for Fold {fold_num}:\n{cm}")
    print(f"Sensitivity: {sensitivity:.4f}, Specificity: {specificity:.4f}, Balanced Accuracy: {balanced_acc:.4f}")
    return sensitivity, specificity, balanced_acc

# Cross-validation loop
grouped_by_id = defaultdict(list)
for item in newdata:
    grouped_by_id[item['id']].append(item)

all_ids = list(grouped_by_id.keys())
folds = stratified_group_split(all_ids, grouped_by_id)

for fold in range(5):
    print(f"\nProcessing Fold {fold+1}/5...")
    train_ids = [id_ for id_ in all_ids if id_ not in folds[fold]]
    val_ids = folds[fold]
    train_samples = [item for id_soggetto in train_ids for item in grouped_by_id[id_soggetto]]
    val_samples = [item for id_soggetto in val_ids for item in grouped_by_id[id_soggetto]]

    print(f"Training set size: {len(train_samples)}, Validation set size: {len(val_samples)}")

    train_dataset = AudioDataset(train_samples, feature_extractor)
    val_dataset = AudioDataset(val_samples, feature_extractor)
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

    model = DualInputASTModel()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.AdamW(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=learning_rate,
        weight_decay=0.05
    )
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=gamma)

    best_val_accuracy = 0
    patience_counter = 0
    epoch_train_accuracies, epoch_val_accuracies = [], []
    epoch_train_precisions, epoch_val_precisions = [], []
    epoch_train_recalls, epoch_val_recalls = [], []
    epoch_train_f1s, epoch_val_f1s = [], []
    epoch_train_losses, epoch_val_losses = [], []
    val_fold_preds, val_fold_true = [], []

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs} for Fold {fold+1}")

        model.train()
        train_loss, correct_train = 0.0, 0
        train_preds, train_true = [], []

        for inputs_first, inputs_last, labels, _ in train_loader:
            inputs_first, inputs_last, labels = inputs_first.to(device), inputs_last.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs_first, inputs_last)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * labels.size(0)
            preds = torch.argmax(outputs, dim=1)
            correct_train += torch.sum(preds == labels).item()
            train_preds.extend(preds.cpu().numpy())
            train_true.extend(labels.cpu().numpy())

        train_loss /= len(train_loader.dataset)
        train_accuracy = correct_train / len(train_loader.dataset)
        train_precision = precision_score(train_true, train_preds, zero_division=0)
        train_recall = recall_score(train_true, train_preds)
        train_f1 = f1_score(train_true, train_preds)

        model.eval()
        val_loss, correct_val = 0.0, 0
        val_preds, val_true = [], []

        with torch.no_grad():
            for inputs_first, inputs_last, labels, _ in val_loader:
                inputs_first, inputs_last, labels = inputs_first.to(device), inputs_last.to(device), labels.to(device)
                outputs = model(inputs_first, inputs_last)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * labels.size(0)
                preds = torch.argmax(outputs, dim=1)
                correct_val += torch.sum(preds == labels).item()
                val_preds.extend(preds.cpu().numpy())
                val_true.extend(labels.cpu().numpy())

        updrs_results = analyze_updrs(val_loader, model, device)

        val_loss /= len(val_loader.dataset)
        val_accuracy = correct_val / len(val_loader.dataset)
        val_precision = precision_score(val_true, val_preds, zero_division=0)
        val_recall = recall_score(val_true, val_preds)
        val_f1 = f1_score(val_true, val_preds)
        val_sensitivity = val_recall

        true_negatives = np.sum((np.array(val_true) == 0) & (np.array(val_preds) == 0))
        false_positives = np.sum((np.array(val_true) == 0) & (np.array(val_preds) == 1))
        val_specificity = true_negatives / (true_negatives + false_positives)

        epoch_train_accuracies.append(train_accuracy)
        epoch_val_accuracies.append(val_accuracy)
        epoch_train_precisions.append(train_precision)
        epoch_val_precisions.append(val_precision)
        epoch_train_recalls.append(train_recall)
        epoch_val_recalls.append(val_recall)
        epoch_train_f1s.append(train_f1)
        epoch_val_f1s.append(val_f1)
        epoch_train_losses.append(train_loss)
        epoch_val_losses.append(val_loss)

        print(f"Fold {fold + 1}, Epoch {epoch + 1} - Train Acc: {train_accuracy:.4f}, Val Acc: {val_accuracy:.4f}")
        print(f"Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            patience_counter = 0
            torch.save(model.state_dict(), f'best_model_fold_{fold+1}.pt')
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

        scheduler.step()

    fold_results.append({
        'train_accuracy': epoch_train_accuracies,
        'val_accuracy': epoch_val_accuracies,
        'train_precision': epoch_train_precisions,
        'val_precision': epoch_val_precisions,
        'train_recall': epoch_train_recalls,
        'val_recall': epoch_val_recalls,
        'train_f1': epoch_train_f1s,
        'val_f1': epoch_val_f1s,
        'train_loss': epoch_train_losses,
        'val_loss': epoch_val_losses,
        'val_sensitivity': val_sensitivity,
        'val_specificity': val_specificity,
        'val_preds': val_fold_preds,
        'val_true': val_fold_true,
        'updrs_results': updrs_results
    })

# Final average metrics
final_train_accuracies = [result['train_accuracy'][-1] for result in fold_results]
final_val_accuracies = [result['val_accuracy'][-1] for result in fold_results]
final_train_precisions = [result['train_precision'][-1] for result in fold_results]
final_val_precisions = [result['val_precision'][-1] for result in fold_results]
final_train_recalls = [result['train_recall'][-1] for result in fold_results]
final_val_recalls = [result['val_recall'][-1] for result in fold_results]
final_train_f1s = [result['train_f1'][-1] for result in fold_results]
final_val_f1s = [result['val_f1'][-1] for result in fold_results]
final_val_sensitivities = [result['val_sensitivity'] for result in fold_results]
final_val_specificities = [result['val_specificity'] for result in fold_results]

average_train_accuracy = np.mean(final_train_accuracies)
average_val_accuracy = np.mean(final_val_accuracies)
average_train_precision = np.mean(final_train_precisions)
average_val_precision = np.mean(final_val_precisions)
average_train_recall = np.mean(final_train_recalls)
average_val_recall = np.mean(final_val_recalls)
average_train_f1 = np.mean(final_train_f1s)
average_val_f1 = np.mean(final_val_f1s)
average_val_sensitivity = np.mean(final_val_sensitivities)
average_val_specificity = np.mean(final_val_specificities)

print("\n===== Average Metrics Across Folds (Last Epoch Only) =====")
print(f"Training Accuracy: {average_train_accuracy:.4f}")
print(f"Validation Accuracy: {average_val_accuracy:.4f}")
print(f"Training Precision: {average_train_precision:.4f}")
print(f"Validation Precision: {average_val_precision:.4f}")
print(f"Training Recall: {average_train_recall:.4f}")
print(f"Validation Recall: {average_val_recall:.4f}")
print(f"Training F1-Score: {average_train_f1:.4f}")
print(f"Validation F1-Score: {average_val_f1:.4f}")
print(f"Validation Sensitivity: {average_val_sensitivity:.4f}")
print(f"Validation Specificity: {average_val_specificity:.4f}")




In [None]:
aggregated_updrs_metrics, probabilities_by_updrs = aggregate_updrs_results(fold_results)

print("\n===== Aggregated UPDRS Metrics Across Folds =====")
for level, metrics in aggregated_updrs_metrics.items():
    print(f"UPDRS Level {level}:")
    print(f"  Total Count: {metrics['total_count']}")
    print(f"  Mean Probability (Parkinsonian): {metrics['mean_probability']:.4f}")
    print(f"  Percentage Classified as Parkinsonian: {metrics['percentage_classified_as_parkinsonian']:.2f}%")
    print("-" * 40)

In [None]:
def plot_updrs_metrics(aggregated_updrs_metrics, probabilities_by_updrs):
    levels = list(aggregated_updrs_metrics.keys())
    total_counts = [aggregated_updrs_metrics[level]['total_count'] for level in levels]
    mean_probs = [aggregated_updrs_metrics[level]['mean_probability'] for level in levels]
    mean_logits = [aggregated_updrs_metrics[level]['mean_logit'] for level in levels]
    percentages_classified = [aggregated_updrs_metrics[level]['percentage_classified_as_parkinsonian'] for level in levels]

    plt.figure(figsize=(10, 6))
    sns.barplot(x=levels, y=mean_probs, palette='Blues_d')
    plt.xlabel('UPDRS Level')
    plt.ylabel('Mean Probability')
    plt.title('Mean Probability of Being Classified as Parkinsonian by UPDRS Level')
    plt.show()

    plt.figure(figsize=(10, 6))
    data = [(level, prob) for level, probs in probabilities_by_updrs.items() for prob in probs]
    df = pd.DataFrame(data, columns=['UPDRS Level', 'Probability'])
    sns.boxplot(x='UPDRS Level', y='Probability', data=df, palette='Pastel1')
    plt.xlabel('UPDRS Level')
    plt.ylabel('Probability')
    plt.title('Distribution of Probabilities by UPDRS Level')
    plt.show()

    plt.figure(figsize=(10, 6))
    sns.barplot(x=levels, y=percentages_classified, palette='Oranges_d')
    plt.xlabel('UPDRS Level')
    plt.ylabel('Percentage Classified as Parkinsonian')
    plt.title('Percentage Classified as Parkinsonian by UPDRS Level')
    plt.show()

    plt.figure(figsize=(10, 6))
    sns.barplot(x=levels, y=total_counts, palette='Greens_d')
    plt.xlabel('UPDRS Level')
    plt.ylabel('Total Count')
    plt.title('Total Data Count by UPDRS Level')
    plt.show()

plot_updrs_metrics(aggregated_updrs_metrics, probabilities_by_updrs)