<a href="https://colab.research.google.com/github/fjadidi2001/AD_Prediction/blob/main/Detecting_dementia_from_speech_and_transcripts_using_transformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Step 1: Set Up Google Colab Environment

In [1]:
import os
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Install required libraries
!pip install torch torchvision torchaudio
!pip install transformers
!pip install librosa
!pip install numpy pandas scikit-learn
!pip install matplotlib

# Verify GPU availability
import torch
print("GPU Available:", torch.cuda.is_available())

Mounted at /content/drive
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading

# Step 2: Prepare the Dataset

In [2]:
import librosa
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split

# Define paths
data_dir = '/content/drive/MyDrive/ADReSS_Dataset/'
audio_dir = os.path.join(data_dir, 'audio')
transcript_dir = os.path.join(data_dir, 'transcripts')

# Function to extract log-Mel spectrogram and MFCCs with delta and delta-delta
def extract_audio_features(audio_path, sr=16000, n_mels=128, n_mfcc=13):
    # Load audio
    y, sr = librosa.load(audio_path, sr=sr)

    # Log-Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

    # MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)

    # Delta and delta-delta
    delta_mfcc = librosa.feature.delta(mfcc)
    delta_delta_mfcc = librosa.feature.delta(mfcc, order=2)

    # Stack features as 3-channel image
    log_mel_image = np.stack([log_mel_spec, librosa.feature.delta(log_mel_spec), librosa.feature.delta(log_mel_spec, order=2)], axis=-1)
    mfcc_image = np.stack([mfcc, delta_mfcc, delta_delta_mfcc], axis=-1)

    return log_mel_image, mfcc_image

# Load dataset
def load_dataset(audio_dir, transcript_dir):
    data = []
    for label in ['AD', 'non-AD']:
        audio_path = os.path.join(audio_dir, label)
        transcript_path = os.path.join(transcript_dir, label)

        for audio_file in os.listdir(audio_path):
            if audio_file.endswith('.wav'):
                transcript_file = audio_file.replace('.wav', '.txt')
                if os.path.exists(os.path.join(transcript_path, transcript_file)):
                    data.append({
                        'audio_path': os.path.join(audio_path, audio_file),
                        'transcript_path': os.path.join(transcript_path, transcript_file),
                        'label': 1 if label == 'AD' else 0
                    })

    return pd.DataFrame(data)

# Preprocess dataset
dataset = load_dataset(audio_dir, transcript_dir)
train_df, val_df = train_test_split(dataset, test_size=0.35, random_state=42)

print("Training samples:", len(train_df))
print("Validation samples:", len(val_df))

FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/ADReSS_Dataset/audio/AD'