# Mental Health Classifier - Exploration Notebook

This notebook demonstrates the mental health classifier for Depression, Anxiety, and Suicide risk detection.

## Features
- Multi-headed attention transformer architecture
- Clinical text preprocessing
- Attention visualization
- Performance analysis

In [None]:
import sys
import os
from pathlib import Path

# Add project root to path
project_root = Path('..').resolve()
if str(project_root / 'src') not in sys.path:
    sys.path.append(str(project_root / 'src'))

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Set style
plt.style.use('default')
sns.set_palette('husl')

In [None]:
# Import project modules
from src.models import MentalHealthClassifier, create_model
from src.data import DataProcessor, ClinicalTextPreprocessor, create_sample_data
from src.training import create_trainer
from src.utils import (
    load_config, setup_logging, set_random_seeds, get_device,
    print_model_summary, print_data_summary
)

## Configuration and Setup

In [None]:
# Load configuration
config = load_config('../config/config.yaml')

# Setup
set_random_seeds(42)
device = get_device()

print(f'Using device: {device}')
print(f'Configuration loaded')

## Create Sample Data

Let's create some sample data to experiment with:

In [None]:
# Create sample dataset
os.makedirs('../data', exist_ok=True)
create_sample_data('../data/sample_data.csv', num_samples=300)

# Load and examine the data
df = pd.read_csv('../data/sample_data.csv')
print(f'Dataset shape: {df.shape}')
print('\nLabel distribution:')
print(df['label'].value_counts())

# Show some examples
print('\nSample texts:')
for label in df['label'].unique():
    print(f'\n{label.upper()}:')
    examples = df[df['label'] == label]['text'].head(2)
    for i, text in enumerate(examples, 1):
        print(f'{i}. {text}')

## Data Processing

Let's explore the clinical text preprocessing:

In [None]:
# Initialize preprocessor
preprocessor = ClinicalTextPreprocessor(
    expand_contractions=True,
    normalize_clinical_terms=True
)

# Test preprocessing
sample_text = "Pt c/o severe depression w/ SI and h/o MDD. R/O GAD."

print('Original text:')
print(sample_text)

print('\nPreprocessed text:')
print(preprocessor.preprocess(sample_text))

print('\nTokenized:')
print(preprocessor.tokenize_text(sample_text))

## Model Architecture

Let's create and examine the model:

In [None]:
# Update config for small example
config['model'].update({
    'vocab_size': 1000,  # Will be updated after building vocab
    'n_embd': 128,
    'num_heads': 4,
    'n_layer': 3,
    'max_seq_length': 256
})

# Create model
model = create_model(config['model'])

# Print model summary
print_model_summary(model, config['model'])

## Data Preparation

In [None]:
# Split data
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label'])
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_df['label'])

print(f'Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}')

# Initialize data processor
data_processor = DataProcessor(config['data'])

# Prepare data
train_texts = train_df['text'].tolist()
train_labels = data_processor.encode_labels(train_df['label'].tolist())

val_texts = val_df['text'].tolist()
val_labels = data_processor.encode_labels(val_df['label'].tolist())

test_texts = test_df['text'].tolist()
test_labels = data_processor.encode_labels(test_df['label'].tolist())

# Build vocabulary
data_processor.build_vocabulary(train_texts)

# Update model vocab size
config['model']['vocab_size'] = len(data_processor.vocab)
model = create_model(config['model'])

print(f'Vocabulary size: {len(data_processor.vocab)}')

## Training

Let's train a small model for demonstration:

In [None]:
# Create data loaders
dataloaders = data_processor.create_dataloaders(
    train_texts, train_labels,
    val_texts, val_labels,
    test_texts, test_labels
)

# Update training config for quick demo
config['training'].update({
    'num_epochs': 3,
    'batch_size': 16,
    'learning_rate': 1e-3
})

# Calculate class weights
class_weights = data_processor.get_class_weights(train_labels)

# Create trainer
trainer = create_trainer(
    model=model,
    config=config['training'],
    device=device,
    class_weights=class_weights
)

print('Starting training...')
trainer.train(dataloaders['train'], dataloaders['val'])

## Evaluation and Visualization

In [None]:
# Evaluate on test set
test_metrics = trainer.evaluate(dataloaders['test'], save_plots=False)

print('\nTest Results:')
for metric, value in test_metrics.items():
    if isinstance(value, (int, float)):
        print(f'{metric}: {value:.4f}')

In [None]:
# Plot training history
trainer.plot_training_history()

## Interactive Predictions

In [None]:
# Test predictions
test_examples = [
    "I feel completely hopeless and nothing seems to matter anymore",
    "I'm constantly worried about everything and can't stop the racing thoughts",
    "I've been having thoughts about ending my life",
    "Patient reports feeling great and enjoying activities"
]

print('PREDICTION EXAMPLES')
print('=' * 50)

for text in test_examples:
    prediction, probabilities = trainer.predict_text(
        text, data_processor, return_probabilities=True
    )
    
    print(f'Text: {text}')
    print(f'Predicted: {prediction}')
    print('Probabilities:')
    for label, prob in probabilities.items():
        print(f'  {label}: {prob:.3f}')
    print('-' * 50)

## Attention Visualization

Let's visualize what the model is paying attention to:

In [None]:
# Get attention weights for a sample
sample_text = "I feel hopeless and have thoughts of ending my life"

# Preprocess and tokenize
preprocessed = data_processor.preprocessor.preprocess(sample_text)
tokens = data_processor.tokenizer(preprocessed)
token_ids = [data_processor.vocab[token] for token in tokens]

# Pad if necessary
max_length = config['model']['max_seq_length']
if len(token_ids) > max_length:
    token_ids = token_ids[:max_length]
    tokens = tokens[:max_length]

# Create attention mask
attention_mask = [1] * len(token_ids)
pad_token_id = data_processor.vocab['<pad>']
padding_length = max_length - len(token_ids)
token_ids.extend([pad_token_id] * padding_length)
attention_mask.extend([0] * padding_length)

# Convert to tensors
input_ids = torch.tensor([token_ids], dtype=torch.long).to(device)
attention_mask_tensor = torch.tensor([attention_mask], dtype=torch.long).to(device)

# Get attention weights
attention_weights = model.get_attention_weights(input_ids, attention_mask_tensor)

# Visualize attention for first layer, first head
if attention_weights:
    plt.figure(figsize=(12, 8))
    
    # Get attention for actual tokens (not padding)
    seq_len = len(tokens)
    attn = attention_weights[0][0, 0, :seq_len, :seq_len].cpu().detach().numpy()
    
    plt.imshow(attn, cmap='Blues', aspect='auto')
    plt.colorbar()
    plt.title('Attention Weights - Layer 1, Head 1')
    plt.xlabel('Key Position')
    plt.ylabel('Query Position')
    
    # Add token labels
    if len(tokens) <= 15:  # Only for short sequences
        plt.xticks(range(len(tokens)), tokens, rotation=45)
        plt.yticks(range(len(tokens)), tokens)
    
    plt.tight_layout()
    plt.show()
    
    print(f'Sample text: {sample_text}')
    print(f'Tokens: {tokens}')
    print(f'Attention shape: {attention_weights[0].shape}')
else:
    print('No attention weights available')

## Model Analysis

Let's analyze model performance in more detail:

In [None]:
# Analyze predictions by class
from collections import defaultdict

class_predictions = defaultdict(list)
class_correct = defaultdict(int)
class_total = defaultdict(int)

# Get predictions for test set
model.eval()
with torch.no_grad():
    for batch in dataloaders['test']:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        outputs = model(input_ids, attention_mask)
        predictions = torch.argmax(outputs['logits'], dim=-1)
        
        for pred, label in zip(predictions.cpu().numpy(), labels.cpu().numpy()):
            class_predictions[label].append(pred)
            class_total[label] += 1
            if pred == label:
                class_correct[label] += 1

# Print per-class accuracy
print('Per-class Performance:')
print('-' * 30)
for class_id in sorted(class_total.keys()):
    class_name = data_processor.label_names[class_id]
    accuracy = class_correct[class_id] / class_total[class_id]
    print(f'{class_name}: {accuracy:.3f} ({class_correct[class_id]}/{class_total[class_id]})')

## Next Steps

This notebook demonstrates the basic functionality of the mental health classifier. For production use, consider:

1. **Larger datasets**: Use real clinical datasets like MIMIC-III/IV
2. **Model scaling**: Increase model size and training epochs
3. **Clinical validation**: Validate with mental health professionals
4. **Ethical considerations**: Implement safeguards and bias detection
5. **Integration**: Connect with clinical decision support systems

**Important**: This is a research tool and should not be used for actual clinical diagnosis without proper validation and oversight.