<a href="https://colab.research.google.com/github/fjadidi2001/AD_Prediction/blob/main/Dementia_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Multimodal Deep Learning System for Alzheimer's Disease Classification
# ADReSSo21 Dataset - Acoustic and Linguistic Feature Integration

import os
import numpy as np
import pandas as pd
import librosa
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchaudio
import transformers
from transformers import Wav2Vec2Processor, Wav2Vec2Model, AutoTokenizer, AutoModel
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import speech_recognition as sr
import textstat
import re
from collections import Counter
import torch_geometric
from torch_geometric.nn import GCNConv, global_mean_pool
from torch_geometric.data import Data, Batch
import warnings
warnings.filterwarnings('ignore')

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Install required packages for Colab
def install_requirements():
    """Install required packages for Google Colab"""
    packages = [
        'librosa',
        'transformers',
        'torch-geometric',
        'SpeechRecognition',
        'textstat',
        'networkx',
        'pyaudio',
        'pydub'
    ]

    for package in packages:
        try:
            os.system(f'pip install -q {package}')
        except:
            print(f"Failed to install {package}")

# Data paths configuration
class DataConfig:
    def __init__(self):
        self.base_path = "/content/drive/MyDrive/Voice/extracted/ADReSSo21"
        self.diagnosis_paths = {
            'ad': f"{self.base_path}/diagnosis/train/audio/ad",
            'cn': f"{self.base_path}/diagnosis/train/audio/cn"
        }
        self.progression_paths = {
            'decline': f"{self.base_path}/progression/train/audio/decline",
            'no_decline': f"{self.base_path}/progression/train/audio/no_decline"
        }
        self.test_path = f"{self.base_path}/progression/test-dist/audio"

# Acoustic Feature Extractor
class AcousticFeatureExtractor:
    def __init__(self, sample_rate=16000, n_mels=128, n_fft=2048, hop_length=512):
        self.sample_rate = sample_rate
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.hop_length = hop_length

        # Initialize Wav2Vec2 processor and model
        self.wav2vec_processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
        self.wav2vec_model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h").to(device)

    def extract_log_mel_spectrogram(self, audio, sr):
        """Extract Log-Mel spectrogram features"""
        mel_spec = librosa.feature.melspectrogram(
            y=audio, sr=sr, n_mels=self.n_mels, n_fft=self.n_fft, hop_length=self.hop_length
        )
        log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
        return log_mel_spec

    def extract_delta_features(self, log_mel_spec):
        """Extract delta and delta-delta features"""
        delta = librosa.feature.delta(log_mel_spec)
        delta_delta = librosa.feature.delta(log_mel_spec, order=2)
        return delta, delta_delta

    def extract_wav2vec_embeddings(self, audio):
        """Extract Wav2Vec2 embeddings"""
        try:
            # Ensure audio is the right format
            if len(audio.shape) > 1:
                audio = audio.mean(axis=0)

            # Resample to 16kHz if needed
            if self.sample_rate != 16000:
                audio = librosa.resample(audio, orig_sr=self.sample_rate, target_sr=16000)

            inputs = self.wav2vec_processor(audio, sampling_rate=16000, return_tensors="pt")
            inputs = {k: v.to(device) for k, v in inputs.items()}

            with torch.no_grad():
                outputs = self.wav2vec_model(**inputs)
                embeddings = outputs.last_hidden_state.squeeze().cpu().numpy()

            return embeddings
        except Exception as e:
            print(f"Error extracting Wav2Vec embeddings: {e}")
            return np.zeros((100, 768))  # Fallback

    def extract_all_features(self, audio_path):
        """Extract all acoustic features from audio file"""
        try:
            audio, sr = librosa.load(audio_path, sr=self.sample_rate)

            # Extract features
            log_mel_spec = self.extract_log_mel_spectrogram(audio, sr)
            delta, delta_delta = self.extract_delta_features(log_mel_spec)
            wav2vec_embeddings = self.extract_wav2vec_embeddings(audio)

            return {
                'log_mel_spec': log_mel_spec,
                'delta': delta,
                'delta_delta': delta_delta,
                'wav2vec_embeddings': wav2vec_embeddings,
                'audio_length': len(audio) / sr
            }
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")
            return None

# Automatic Speech Recognition
class ASRTranscriber:
    def __init__(self):
        self.recognizer = sr.Recognizer()

    def transcribe_audio(self, audio_path):
        """Transcribe audio to text using Google Speech Recognition"""
        try:
            with sr.AudioFile(audio_path) as source:
                audio_data = self.recognizer.record(source)
                text = self.recognizer.recognize_google(audio_data)
                return text
        except Exception as e:
            print(f"ASR Error for {audio_path}: {e}")
            return ""

# Linguistic Feature Extractor
class LinguisticFeatureExtractor:
    def __init__(self):
        # Initialize transformer models
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
        self.bert_model = AutoModel.from_pretrained('bert-base-uncased').to(device)

    def extract_transformer_features(self, text):
        """Extract BERT embeddings"""
        try:
            inputs = self.tokenizer(text, return_tensors='pt', truncation=True,
                                  padding=True, max_length=512)
            inputs = {k: v.to(device) for k, v in inputs.items()}

            with torch.no_grad():
                outputs = self.bert_model(**inputs)
                embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()

            return embeddings
        except Exception as e:
            print(f"Error extracting transformer features: {e}")
            return np.zeros(768)

    def extract_psycholinguistic_features(self, text):
        """Extract psycholinguistic features using textstat"""
        if not text.strip():
            return np.zeros(10)

        features = [
            textstat.flesch_reading_ease(text),
            textstat.flesch_kincaid_grade(text),
            textstat.gunning_fog(text),
            textstat.coleman_liau_index(text),
            textstat.automated_readability_index(text),
            textstat.avg_sentence_length(text),
            textstat.avg_syllables_per_word(text),
            textstat.sentence_count(text),
            len(text.split()),  # word count
            len(text)  # character count
        ]

        return np.array(features, dtype=np.float32)

    def extract_repetitiveness_features(self, text):
        """Extract repetitiveness and fluency features"""
        if not text.strip():
            return np.zeros(5)

        words = text.lower().split()
        word_counts = Counter(words)

        # Repetitiveness metrics
        total_words = len(words)
        unique_words = len(set(words))
        repetition_ratio = 1 - (unique_words / max(total_words, 1))

        # Filler words
        filler_words = ['um', 'uh', 'er', 'ah', 'well', 'you know', 'like']
        filler_count = sum(word_counts.get(filler, 0) for filler in filler_words)
        filler_ratio = filler_count / max(total_words, 1)

        # Pause indicators (simplified)
        pause_indicators = text.count('...') + text.count('..') + text.count(' - ')
        pause_ratio = pause_indicators / max(len(text.split('.')), 1)

        return np.array([
            repetition_ratio,
            filler_ratio,
            pause_ratio,
            unique_words / max(total_words, 1),  # lexical diversity
            len(set(words)) / max(len(words), 1)  # type-token ratio
        ], dtype=np.float32)

    def extract_lexical_complexity_features(self, text):
        """Extract lexical complexity features"""
        if not text.strip():
            return np.zeros(8)

        words = text.split()
        sentences = text.split('.')

        # Basic complexity metrics
        avg_word_length = np.mean([len(word) for word in words]) if words else 0
        avg_sentence_length = np.mean([len(sent.split()) for sent in sentences if sent.strip()]) if sentences else 0

        # Syllable complexity (simplified)
        def count_syllables(word):
            word = word.lower().strip('.,!?";')
            count = 0
            vowels = 'aeiouy'
            if word and word[0] in vowels:
                count += 1
            for i in range(1, len(word)):
                if word[i] in vowels and word[i-1] not in vowels:
                    count += 1
            if word.endswith('e'):
                count -= 1
            return max(count, 1)

        syllable_counts = [count_syllables(word) for word in words]
        avg_syllables = np.mean(syllable_counts) if syllable_counts else 0

        return np.array([
            avg_word_length,
            avg_sentence_length,
            avg_syllables,
            len(words),  # total words
            len(sentences),  # total sentences
            len(set(words)),  # unique words
            textstat.difficult_words(text),
            textstat.polysyllabcount(text)
        ], dtype=np.float32)

    def extract_all_features(self, text):
        """Extract all linguistic features"""
        transformer_features = self.extract_transformer_features(text)
        psycholinguistic_features = self.extract_psycholinguistic_features(text)
        repetitiveness_features = self.extract_repetitiveness_features(text)
        lexical_complexity_features = self.extract_lexical_complexity_features(text)

        return {
            'transformer_features': transformer_features,
            'psycholinguistic_features': psycholinguistic_features,
            'repetitiveness_features': repetitiveness_features,
            'lexical_complexity_features': lexical_complexity_features
        }

# Graph Neural Network for Feature Visualization
class FeatureGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, output_dim=32):
        super(FeatureGNN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x, edge_index, batch=None):
        x = F.relu(self.conv1(x, edge_index))
        x = self.dropout(x)
        x = self.conv2(x, edge_index)

        if batch is not None:
            x = global_mean_pool(x, batch)

        return x

# Vision Transformer with Graph-based Attention
class GraphViT(nn.Module):
    def __init__(self, acoustic_dim, linguistic_dim, num_heads=8, num_layers=6):
        super(GraphViT, self).__init__()
        self.acoustic_dim = acoustic_dim
        self.linguistic_dim = linguistic_dim
        self.d_model = 512

        # Projection layers
        self.acoustic_proj = nn.Linear(acoustic_dim, self.d_model)
        self.linguistic_proj = nn.Linear(linguistic_dim, self.d_model)

        # Positional encoding
        self.pos_encoding = nn.Parameter(torch.randn(1000, self.d_model))

        # Transformer layers
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.d_model,
            nhead=num_heads,
            dim_feedforward=2048,
            dropout=0.1
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        # Graph attention
        self.graph_attention = nn.MultiheadAttention(self.d_model, num_heads)

        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(self.d_model, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 2)  # AD vs CN
        )

    def forward(self, acoustic_features, linguistic_features):
        # Project features to common dimension
        acoustic_proj = self.acoustic_proj(acoustic_features)
        linguistic_proj = self.linguistic_proj(linguistic_features)

        # Combine features
        combined = torch.cat([acoustic_proj, linguistic_proj], dim=1)
        seq_len = combined.size(1)

        # Add positional encoding
        combined = combined + self.pos_encoding[:seq_len]

        # Transformer encoding
        encoded = self.transformer(combined.transpose(0, 1)).transpose(0, 1)

        # Graph-based attention between acoustic and linguistic features
        acoustic_len = acoustic_proj.size(1)
        acoustic_encoded = encoded[:, :acoustic_len]
        linguistic_encoded = encoded[:, acoustic_len:]

        # Cross-modal attention
        attended, _ = self.graph_attention(
            acoustic_encoded.transpose(0, 1),
            linguistic_encoded.transpose(0, 1),
            linguistic_encoded.transpose(0, 1)
        )
        attended = attended.transpose(0, 1)

        # Global average pooling
        pooled = torch.mean(attended, dim=1)

        # Classification
        output = self.classifier(pooled)
        return output

# Dataset Class
class ADReSSoDataset(Dataset):
    def __init__(self, data_paths, config, mode='train'):
        self.data_paths = data_paths
        self.config = config
        self.mode = mode
        self.samples = []

        # Initialize feature extractors
        self.acoustic_extractor = AcousticFeatureExtractor()
        self.asr_transcriber = ASRTranscriber()
        self.linguistic_extractor = LinguisticFeatureExtractor()

        # Load data samples
        self._load_samples()

    def _load_samples(self):
        """Load all audio file paths and labels"""
        if self.mode == 'diagnosis':
            for label, path in self.data_paths.items():
                if os.path.exists(path):
                    for file in os.listdir(path):
                        if file.endswith(('.wav', '.mp3', '.flac')):
                            self.samples.append({
                                'path': os.path.join(path, file),
                                'label': 1 if label == 'ad' else 0,
                                'task': 'diagnosis'
                            })

        print(f"Loaded {len(self.samples)} samples")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]

        # Extract acoustic features
        acoustic_features = self.acoustic_extractor.extract_all_features(sample['path'])
        if acoustic_features is None:
            return None

        # Transcribe audio to text
        transcript = self.asr_transcriber.transcribe_audio(sample['path'])

        # Extract linguistic features
        linguistic_features = self.linguistic_extractor.extract_all_features(transcript)

        return {
            'acoustic_features': acoustic_features,
            'linguistic_features': linguistic_features,
            'transcript': transcript,
            'label': sample['label'],
            'file_path': sample['path']
        }

# Feature Visualization and Graph Creation
class FeatureVisualizer:
    def __init__(self):
        pass

    def create_feature_graph(self, acoustic_features, linguistic_features):
        """Create a graph representing feature relationships"""
        G = nx.Graph()

        # Add acoustic feature nodes
        acoustic_nodes = ['log_mel', 'delta', 'delta_delta', 'wav2vec']
        for node in acoustic_nodes:
            G.add_node(node, type='acoustic')

        # Add linguistic feature nodes
        linguistic_nodes = ['transformer', 'psycholinguistic', 'repetitiveness', 'lexical']
        for node in linguistic_nodes:
            G.add_node(node, type='linguistic')

        # Add edges based on feature correlations (simplified)
        # In practice, you would compute actual correlations
        edges = [
            ('log_mel', 'transformer', {'weight': 0.3}),
            ('wav2vec', 'transformer', {'weight': 0.7}),
            ('delta', 'psycholinguistic', {'weight': 0.4}),
            ('repetitiveness', 'delta_delta', {'weight': 0.2}),
            ('lexical', 'log_mel', {'weight': 0.1})
        ]

        G.add_edges_from(edges)
        return G

    def visualize_feature_graph(self, G, title="Feature Interaction Graph"):
        """Visualize the feature interaction graph"""
        plt.figure(figsize=(12, 8))

        # Define colors for different node types
        node_colors = []
        for node in G.nodes():
            if G.nodes[node]['type'] == 'acoustic':
                node_colors.append('lightblue')
            else:
                node_colors.append('lightcoral')

        # Create layout
        pos = nx.spring_layout(G, k=2, iterations=50)

        # Draw the graph
        nx.draw(G, pos,
               node_color=node_colors,
               node_size=2000,
               with_labels=True,
               font_size=10,
               font_weight='bold',
               edge_color='gray',
               width=2)

        # Add edge labels
        edge_labels = nx.get_edge_attributes(G, 'weight')
        nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=8)

        plt.title(title, fontsize=16, fontweight='bold')
        plt.axis('off')
        plt.tight_layout()
        plt.show()

    def plot_feature_correlations(self, features_dict, title="Feature Correlations"):
        """Plot correlation matrix of features"""
        # Flatten all features into a single array
        all_features = []
        feature_names = []

        for category, features in features_dict.items():
            if isinstance(features, np.ndarray):
                if features.ndim == 1:
                    all_features.extend(features[:10])  # Limit to first 10 features
                    feature_names.extend([f"{category}_{i}" for i in range(min(10, len(features)))])
                else:
                    flattened = features.flatten()[:10]
                    all_features.extend(flattened)
                    feature_names.extend([f"{category}_{i}" for i in range(len(flattened))])

        # Create correlation matrix (simplified example)
        if len(all_features) > 1:
            feature_matrix = np.array(all_features).reshape(1, -1)
            correlation_matrix = np.corrcoef(feature_matrix.T)

            plt.figure(figsize=(10, 8))
            sns.heatmap(correlation_matrix,
                       xticklabels=feature_names[:correlation_matrix.shape[0]],
                       yticklabels=feature_names[:correlation_matrix.shape[1]],
                       annot=True,
                       cmap='coolwarm',
                       center=0)
            plt.title(title)
            plt.tight_layout()
            plt.show()

# Training Pipeline
class ADClassificationTrainer:
    def __init__(self, config):
        self.config = config
        self.device = device

    def prepare_data(self):
        """Prepare training and validation datasets"""
        # Create dataset for diagnosis task
        diagnosis_paths = self.config.diagnosis_paths
        dataset = ADReSSoDataset(diagnosis_paths, self.config, mode='diagnosis')

        # Split into train and validation
        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        train_dataset, val_dataset = torch.utils.data.random_split(
            dataset, [train_size, val_size]
        )

        # Create data loaders
        train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True,
                                collate_fn=self.collate_fn)
        val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False,
                               collate_fn=self.collate_fn)

        return train_loader, val_loader

    def collate_fn(self, batch):
        """Custom collate function to handle variable-length sequences"""
        # Filter out None samples
        batch = [item for item in batch if item is not None]
        if not batch:
            return None

        # Process acoustic features
        acoustic_features = []
        linguistic_features = []
        labels = []

        for item in batch:
            # Flatten and concatenate acoustic features
            acoustic = np.concatenate([
                item['acoustic_features']['log_mel_spec'].flatten()[:1000],
                item['acoustic_features']['delta'].flatten()[:1000],
                item['acoustic_features']['delta_delta'].flatten()[:1000],
                item['acoustic_features']['wav2vec_embeddings'].flatten()[:768]
            ])

            # Concatenate linguistic features
            linguistic = np.concatenate([
                item['linguistic_features']['transformer_features'].flatten()[:768],
                item['linguistic_features']['psycholinguistic_features'],
                item['linguistic_features']['repetitiveness_features'],
                item['linguistic_features']['lexical_complexity_features']
            ])

            acoustic_features.append(acoustic)
            linguistic_features.append(linguistic)
            labels.append(item['label'])

        return {
            'acoustic': torch.FloatTensor(acoustic_features).to(self.device),
            'linguistic': torch.FloatTensor(linguistic_features).to(self.device),
            'labels': torch.LongTensor(labels).to(self.device)
        }

    def train_model(self, train_loader, val_loader, epochs=50):
        """Train the multimodal model"""
        # Initialize model
        acoustic_dim = 3768  # log_mel + delta + delta_delta + wav2vec
        linguistic_dim = 791  # transformer + psycholinguistic + repetitiveness + lexical

        model = GraphViT(acoustic_dim, linguistic_dim).to(self.device)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)

        # Training loop
        train_losses = []
        val_losses = []
        best_val_loss = float('inf')

        for epoch in range(epochs):
            # Training phase
            model.train()
            train_loss = 0.0
            train_correct = 0
            train_total = 0

            for batch in train_loader:
                if batch is None:
                    continue

                optimizer.zero_grad()

                # Forward pass
                outputs = model(batch['acoustic'], batch['linguistic'])
                loss = criterion(outputs, batch['labels'])

                # Backward pass
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()

                # Statistics
                train_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                train_total += batch['labels'].size(0)
                train_correct += (predicted == batch['labels']).sum().item()

            # Validation phase
            model.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0

            with torch.no_grad():
                for batch in val_loader:
                    if batch is None:
                        continue

                    outputs = model(batch['acoustic'], batch['linguistic'])
                    loss = criterion(outputs, batch['labels'])

                    val_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    val_total += batch['labels'].size(0)
                    val_correct += (predicted == batch['labels']).sum().item()

            # Calculate averages
            avg_train_loss = train_loss / len(train_loader)
            avg_val_loss = val_loss / len(val_loader)
            train_acc = 100 * train_correct / train_total
            val_acc = 100 * val_correct / val_total

            train_losses.append(avg_train_loss)
            val_losses.append(avg_val_loss)

            # Learning rate scheduling
            scheduler.step(avg_val_loss)

            # Save best model
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                torch.save(model.state_dict(), 'best_ad_model.pth')

            print(f'Epoch [{epoch+1}/{epochs}]')
            print(f'Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.2f}%')
            print(f'Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%')
            print('-' * 60)

        return model, train_losses, val_losses

# Main execution function
def main():
    """Main function to run the entire pipeline"""
    print("Initializing AD Classification System...")

    # Install requirements (uncomment for first run)
    # install_requirements()

    # Mount Google Drive (uncomment for Colab)
    # from google.colab import drive
    # drive.mount('/content/drive')

    # Initialize configuration
    config = DataConfig()

    # Initialize visualizer
    visualizer = FeatureVisualizer()

    # Initialize trainer
    trainer = ADClassificationTrainer(config)

    print("Preparing data...")
    train_loader, val_loader = trainer.prepare_data()

    print("Starting training...")
    model, train_losses, val_losses = trainer.train_model(train_loader, val_loader)

    # Plot training curves
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    # Create example feature graph
    example_acoustic = {'log_mel': np.random.randn(128, 100)}
    example_linguistic = {'transformer': np.random.randn(768)}
    G = visualizer.create_feature_graph(example_acoustic, example_linguistic)
    visualizer.visualize_feature_graph(G)

    plt.tight_layout()
    plt.show()

    print("Training completed! Model saved as 'best_ad_model.pth'")

# Example usage for testing individual components
def test_feature_extraction():
    """Test feature extraction on a single audio file"""
    # Initialize extractors
    acoustic_extractor = AcousticFeatureExtractor()
    asr_transcriber = ASRTranscriber()
    linguistic_extractor = LinguisticFeatureExtractor()
    visualizer = FeatureVisualizer()

    # Example audio file path (replace with actual path)
    audio_path = "/content/drive/MyDrive/Voice/extracted/ADReSSo21/diagnosis/train/audio/ad/sample.wav"

    if os.path.exists(audio_path):
        print(f"Processing: {audio_path}")

        # Extract acoustic features
        acoustic_features = acoustic_extractor.extract_all_features(audio_path)
        print(f"Acoustic features extracted: {list(acoustic_features.keys())}")

        # Transcribe audio
        transcript = asr_transcriber.transcribe_audio(audio_path)
        print(f"Transcript: {transcript[:100]}...")

        # Extract linguistic features
        linguistic_features = linguistic_extractor.extract_all_features(transcript)
        print(f"Linguistic features extracted: {list(linguistic_features.keys())}")

        # Visualize feature relationships
        G = visualizer.create_feature_graph(acoustic_features, linguistic_features)
        visualizer.visualize_feature_graph(G, "Sample Feature Interaction Graph")

        # Plot feature correlations
        all_features = {**acoustic_features, **linguistic_features}
        visualizer.plot_feature_correlations(all_features, "Sample Feature Correlations")

ModuleNotFoundError: No module named 'speech_recognition'