<a href="https://colab.research.google.com/github/fjadidi2001/AD_Prediction/blob/main/July16_Speech_CompleteV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os
import glob
import pandas as pd

# Install required libraries
!pip install librosa numpy scipy matplotlib seaborn pandas transformers torch speechbrain networkx

# Define dataset paths
audio_paths = {
    'train_ad': '/content/drive/MyDrive/Voice/extracted/ADReSSo21/diagnosis/train/audio/ad/*.wav',
    'train_cn': '/content/drive/MyDrive/Voice/extracted/ADReSSo21/diagnosis/train/audio/cn/*.wav',
    'test': '/content/drive/MyDrive/Voice/extracted/ADReSSo21/progression/test-dist/audio/*.wav'
}
seg_paths = {
    'train_ad': '/content/drive/MyDrive/Voice/extracted/ADReSSo21/diagnosis/train/segmentation/ad/*.csv',
    'train_cn': '/content/drive/MyDrive/Voice/extracted/ADReSSo21/diagnosis/train/segmentation/cn/*.csv',
    'test': '/content/drive/MyDrive/Voice/extracted/ADReSSo21/progression/test-dist/segmentation/*.csv'
}

# Create output directory if it doesn't exist
output_dir = '/content/drive/MyDrive/Voice/extracted/ADReSSo21'
os.makedirs(output_dir, exist_ok=True)

# Organize dataset
metadata = []
for dataset, path in audio_paths.items():
    label = 'ad' if 'ad' in dataset else 'cn' if 'cn' in dataset else None
    dataset_type = 'train' if 'train' in dataset else 'test'
    for audio_file in glob.glob(path):
        seg_file = audio_file.replace('/audio/', '/segmentation/').replace('.wav', '.csv')
        metadata.append({
            'audio_path': audio_file,
            'segmentation_path': seg_file,
            'label': label,
            'dataset_type': dataset_type
        })

# Create metadata DataFrame
metadata_df = pd.DataFrame(metadata)
metadata_df.to_csv(os.path.join(output_dir, 'metadata.csv'), index=False)



In [None]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy.fft import fft
from scipy.signal import stft
from librosa.feature import spectral_centroid, spectral_rolloff, spectral_bandwidth

# Load metadata
metadata_df = pd.read_csv('/content/drive/MyDrive/Voice/extracted/ADReSSo21/metadata.csv')

# Time-Domain Analysis
def time_domain_analysis(audio_path):
    y, sr = librosa.load(audio_path)
    # Waveform plot
    plt.figure(figsize=(10, 4))
    librosa.display.waveshow(y, sr=sr)
    plt.title(f'Waveform: {os.path.basename(audio_path)}')
    plt.savefig(f'/content/drive/MyDrive/Voice/extracted/ADReSSo21/visualizations/waveform_{os.path.basename(audio_path)}.png')
    plt.close()
    # Amplitude stats
    stats = {
        'mean': np.mean(np.abs(y)),
        'median': np.median(np.abs(y)),
        'std': np.std(y),
        'min': np.min(y),
        'max': np.max(y),
        'rms': np.sqrt(np.mean(y**2))
    }
    # Zero-crossing rate
    zcr = librosa.feature.zero_crossing_rate(y)[0].mean()
    # Duration
    duration = librosa.get_duration(y=y, sr=sr)
    return stats, zcr, duration

# Frequency-Domain Analysis
def frequency_domain_analysis(audio_path):
    y, sr = librosa.load(audio_path)
    # FFT
    fft_out = fft(y)
    freqs = np.fft.fftfreq(len(fft_out), 1/sr)
    plt.figure(figsize=(10, 4))
    plt.plot(freqs[:len(freqs)//2], np.abs(fft_out)[:len(freqs)//2])
    plt.title(f'FFT: {os.path.basename(audio_path)}')
    plt.savefig(f'/content/drive/MyDrive/Voice/extracted/ADReSSo21/visualizations/fft_{os.path.basename(audio_path)}.png')
    plt.close()
    # Spectral stats
    centroid = spectral_centroid(y=y, sr=sr)[0].mean()
    spread = spectral_bandwidth(y=y, sr=sr)[0].mean()
    rolloff = spectral_rolloff(y=y, sr=sr)[0].mean()
    return {'centroid': centroid, 'spread': spread, 'rolloff': rolloff}

# Time-Frequency Analysis
def time_frequency_analysis(audio_path):
    y, sr = librosa.load(audio_path)
    # Spectrogram
    D = np.abs(stft(y)[2])
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max), sr=sr, x_axis='time', y_axis='hz')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f'Spectrogram: {os.path.basename(audio_path)}')
    plt.savefig(f'/content/drive/MyDrive/Voice/extracted/ADReSSo21/visualizations/spectrogram_{os.path.basename(audio_path)}.png')
    plt.close()
    # Mel-spectrogram
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(librosa.power_to_db(mel, ref=np.max), sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f'Mel-Spectrogram: {os.path.basename(audio_path)}')
    plt.savefig(f'/content/drive/MyDrive/Voice/extracted/ADReSSo21/visualizations/mel_{os.path.basename(audio_path)}.png')
    plt.close()

# Statistical Analysis
def statistical_analysis(audio_path):
    y, sr = librosa.load(audio_path)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    delta = librosa.feature.delta(mfccs)
    delta_delta = librosa.feature.delta(mfccs, order=2)
    energy = librosa.feature.rms(y=y)[0].mean()
    log_energy = np.log(energy + 1e-10)
    return mfccs.mean(axis=1), delta.mean(axis=1), delta_delta.mean(axis=1), energy, log_energy

# Dataset-Level Analysis
def dataset_analysis(metadata_df):
    # Class distribution
    sns.countplot(data=metadata_df, x='label')
    plt.savefig('/content/drive/MyDrive/Voice/extracted/ADReSSo21/visualizations/class_distribution.png')
    plt.close()
    # Sample rate and channel consistency
    sample_rates = []
    channels = []
    for audio_path in metadata_df['audio_path']:
        y, sr = librosa.load(audio_path, mono=False)
        sample_rates.append(sr)
        channels.append(y.shape[0] if y.ndim > 1 else 1)
    print(f'Sample rates: {set(sample_rates)}')
    print(f'Channels: {set(channels)}')

# Run analysis for sample files
for _, row in metadata_df.sample(5).iterrows():
    time_domain_analysis(row['audio_path'])
    frequency_domain_analysis(row['audio_path'])
    time_frequency_analysis(row['audio_path'])
dataset_analysis(metadata_df)