<a href="https://colab.research.google.com/github/fjadidi2001/AD_Prediction/blob/main/Detecting_dementia_from_speech_and_transcripts_using_transformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1: Set Up Google Colab Environment

In [None]:
import os
from google.colab import drive
import tarfile

# Mount Google Drive
drive.mount('/content/drive')

# Install required libraries
!pip install torch torchvision torchaudio
!pip install transformers
!pip install librosa
!pip install numpy pandas scikit-learn
!pip install matplotlib

# Extract datasets
data_dir = '/content/drive/MyDrive/Voice/'
extract_dir = '/content/ADReSSo21/'

os.makedirs(extract_dir, exist_ok=True)

datasets = [
    'ADReSSo21-diagnosis-train.tgz',
    'ADReSSo21-progression-test.tgz',
    'ADReSSo21-progression-train.tgz'
]

for dataset in datasets:
    tar_path = os.path.join(data_dir, dataset)
    with tarfile.open(tar_path, 'r:gz') as tar:
        tar.extractall(extract_dir)
    print(f"Extracted {dataset}")

# Verify GPU availability
import torch
print("GPU Available:", torch.cuda.is_available())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Extracted ADReSSo21-diagnosis-train.tgz
Extracted ADReSSo21-progression-test.tgz
Extracted ADReSSo21-progression-train.tgz
GPU Available: True


# Step 2: Prepare the Dataset

In [6]:
import librosa
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
import glob

# Define paths
base_dir = '/content/ADReSSo21/'
train_dir = os.path.join(base_dir, 'ADReSSo21-diagnosis-train')
test_dir = os.path.join(base_dir, 'ADReSSo21-progression-test')

# Function to extract log-Mel spectrogram and MFCCs with delta and delta-delta
def extract_audio_features(audio_path, sr=16000, n_mels=128, n_mfcc=13):
    # Load audio
    y, sr = librosa.load(audio_path, sr=sr)

    # Log-Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

    # MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)

    # Delta and delta-delta
    delta_mfcc = librosa.feature.delta(mfcc)
    delta_delta_mfcc = librosa.feature.delta(mfcc, order=2)

    # Stack features as 3-channel image
    log_mel_image = np.stack([log_mel_spec, librosa.feature.delta(log_mel_spec), librosa.feature.delta(log_mel_spec, order=2)], axis=-1)
    mfcc_image = np.stack([mfcc, delta_mfcc, delta_delta_mfcc], axis=-1)

    return log_mel_image, mfcc_image

# Load dataset
def load_dataset(train_dir, test_dir=None):
    data = []

    # Load training data (ADReSSo21-diagnosis-train)
    for label, folder in [('non-AD', 'cc'), ('AD', 'cd')]:
        audio_path = os.path.join(train_dir, folder)
        for audio_file in glob.glob(os.path.join(audio_path, '*.wav')):
            # Assume transcript has same name with .cha or .txt extension
            transcript_file = audio_file.replace('.wav', '.cha')
            if not os.path.exists(transcript_file):
                transcript_file = audio_file.replace('.wav', '.txt')
            if os.path.exists(transcript_file):
                data.append({
                    'audio_path': audio_file,
                    'transcript_path': transcript_file,
                    'label': 1 if label == 'AD' else 0
                })

    train_df = pd.DataFrame(data)

    # Split train and validation (65%-35%)
    train_df, val_df = train_test_split(train_df, test_size=0.35, random_state=42)

    # Load test data if available
    test_df = pd.DataFrame()
    if test_dir and os.path.exists(test_dir):
        data = []
        # Assume test set has similar structure or metadata
        for audio_file in glob.glob(os.path.join(test_dir, '*.wav')):
            transcript_file = audio_file.replace('.wav', '.cha')
            if not os.path.exists(transcript_file):
                transcript_file = audio_file.replace('.wav', '.txt')
            if os.path.exists(transcript_file):
                # Placeholder: Assume label extraction from metadata or filename
                # Modify based on actual test set labels
                label = 0  # Replace with actual label extraction logic
                data.append({
                    'audio_path': audio_file,
                    'transcript_path': transcript_file,
                    'label': label
                })
        test_df = pd.DataFrame(data)

    return train_df, val_df, test_df

# Preprocess dataset
train_df, val_df, test_df = load_dataset(train_dir, test_dir)

print("Training samples:", len(train_df))
print("Validation samples:", len(val_df))
print("Test samples:", len(test_df))

ValueError: With n_samples=0, test_size=0.35 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [7]:
import glob
print(glob.glob('/content/ADReSSo21/ADReSSo21-diagnosis-train/*/*.wav'))
print(glob.glob('/content/ADReSSo21/ADReSSo21-diagnosis-train/*/*.cha'))

[]
[]


In [10]:
print(glob.glob('//content/ADReSSo21/ADReSSo21/progression/*/*.cha'))

[]
