## 1. Environment Setup

In [None]:
# Check if running on Google Colab
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Running on Google Colab")
else:
    print("Running locally")

In [None]:
# Install required packages
"""!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 -q
!pip install torch-geometric -q
!pip install pandas numpy scikit-learn pyyaml tqdm matplotlib seaborn -q"""

In [None]:
# Mount Google Drive (if on Colab)
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Set project path - adjust this to your Drive folder
    PROJECT_PATH = '/content/drive/MyDrive/blg561e_project'
else:
    PROJECT_PATH = '.'

In [None]:
# Add project to Python path
import sys
sys.path.insert(0, PROJECT_PATH)

# Verify path
import os
print(f"Project path: {PROJECT_PATH}")
print(f"Contents: {os.listdir(PROJECT_PATH)}")

## 2. Import Libraries and Package Modules

In [None]:
# Standard libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader  # Use standard PyTorch DataLoader for custom data

# Set random seed for reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

# Check GPU availability
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")
if device == 'cuda':
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

In [None]:
# Reload modules to pick up any changes (important when developing)
import importlib
import token_malice_prediction.data.preprocessing
import token_malice_prediction.data.temporal_graph_builder
import token_malice_prediction.data.dataset
import token_malice_prediction.models.temporal_gnn
import token_malice_prediction.training.temporal_trainer
import token_malice_prediction.evaluation.metrics
import token_malice_prediction.utils.config

importlib.reload(token_malice_prediction.data.preprocessing)
importlib.reload(token_malice_prediction.data.temporal_graph_builder)
importlib.reload(token_malice_prediction.data.dataset)
importlib.reload(token_malice_prediction.models.temporal_gnn)
importlib.reload(token_malice_prediction.training.temporal_trainer)
importlib.reload(token_malice_prediction.evaluation.metrics)
importlib.reload(token_malice_prediction.utils.config)

# Import custom modules from the package
from token_malice_prediction.data.preprocessing import TokenPreprocessor
from token_malice_prediction.data.temporal_graph_builder import (
    TemporalGraphBuilder, 
    TemporalGraphData,
    build_temporal_graphs_from_processed_data
)
from token_malice_prediction.data.dataset import TokenGraphDataset, create_data_loaders
from token_malice_prediction.models.temporal_gnn import (
    HeteroGINEEvolveGCN,
    create_temporal_model
)
from token_malice_prediction.training.temporal_trainer import (
    TemporalTrainer,
    compute_metrics,
    print_model_summary
)
from token_malice_prediction.evaluation.metrics import compute_classification_metrics
from token_malice_prediction.utils.config import load_config, Config

print("All modules imported successfully!")

## 3. Configuration

In [None]:
# Configuration parameters
config = {
    # Data settings
    'data_dir': os.path.join(PROJECT_PATH, 'data_solana'),
    'batch_size': 32,
    'train_ratio': 0.7,
    'val_ratio': 0.15,
    'test_ratio': 0.15,
    'min_transactions': 10,
    'observation_days': 30,       # Days to observe for labeling
    'training_window_days': 20,   # Days of data to use for training
    
    # Classification settings - NEW IMPROVED OPTIONS
    # Options: 'peak_decline', 'sudden_drop', 'initial_decline', 'combined'
    'classification_mode': 'combined',  # Recommended: detects rug pull patterns
    'malice_threshold': 0.90,               # 90% decline threshold
    'sudden_drop_threshold': 0.90,          # 90% drop within the time window
    'sudden_drop_window_hours': 24,        # 24-hour window to detect sudden drops
    
    # Model settings
    'hidden_dim': 128,
    'num_gine_layers': 3,
    'num_temporal_layers': 2,
    'num_snapshots': 6,
    'dropout': 0.3,
    'num_classes': 2,
    'use_evolve_gcn': True,  # False = use LSTM, True = use EvolveGCN-O
    'pooling': 'attention',   # 'attention', 'mean', 'max', 'concat'
    
    # Training settings
    'learning_rate': 1e-1,
    'weight_decay': 0.01,
    'num_epochs': 100,
    'gradient_clip': 1.0,
    'early_stopping_patience': 15,
    'use_class_weights': True,
    
    # Output
    'output_dir': os.path.join(PROJECT_PATH, 'outputs'),
    'seed': SEED,
    'device': device
}

# Create output directory
os.makedirs(config['output_dir'], exist_ok=True)

print("Configuration:")
for k, v in config.items():
    print(f"  {k}: {v}")

## 4. Data Loading and Preprocessing

In [None]:
# Initialize preprocessor with improved classification
preprocessor = TokenPreprocessor(
    data_dir=config['data_dir'],
    malice_threshold=config['malice_threshold'],
    min_transactions=config['min_transactions'],
    observation_days=config['observation_days'],
    training_window_days=config['training_window_days'],
    sudden_drop_threshold=config['sudden_drop_threshold'],
    sudden_drop_window_hours=config['sudden_drop_window_hours'],
    classification_mode=config['classification_mode']
)

# Get list of CSV files
csv_files = preprocessor.get_csv_files()
print(f"Found {len(csv_files)} token CSV files")
print(f"Observation period: {config['observation_days']} days")
print(f"Training window: {config['training_window_days']} days")
print(f"Classification mode: {config['classification_mode']}")
if config['classification_mode'] == 'sudden_drop':
    print(f"  - Sudden drop threshold: {config['sudden_drop_threshold']*100:.0f}%")
    print(f"  - Detection window: {config['sudden_drop_window_hours']} hours")

In [None]:
# Process all tokens
from tqdm.auto import tqdm

processed_data = []
skipped_count = 0

print("Processing token transaction data...")
for filepath in tqdm(csv_files, desc="Processing tokens"):
    result = preprocessor.process_single_token(filepath)
    if result is not None:
        df, label = result
        token_name = filepath.stem  # filename without extension
        processed_data.append((df, label, token_name))
    else:
        skipped_count += 1

print(f"\nProcessed {len(processed_data)} tokens")
print(f"Skipped {skipped_count} tokens (insufficient data/duration)")

In [None]:
# Analyze label distribution
labels = [label for _, label, _ in processed_data]
label_counts = pd.Series(labels).value_counts()

print("\nLabel Distribution:")
print(f"  Benign (0): {label_counts.get(0, 0)}")
print(f"  Malicious (1): {label_counts.get(1, 0)}")
print(f"  Imbalance ratio: {label_counts.max() / label_counts.min():.2f}:1")

# Visualize
plt.figure(figsize=(8, 5))
plt.bar(['Benign (0)', 'Malicious (1)'], [label_counts.get(0, 0), label_counts.get(1, 0)],
        color=['green', 'red'], alpha=0.7)
plt.ylabel('Count')
plt.title('Label Distribution')
plt.tight_layout()
plt.show()

## 5. Build Temporal Graphs

In [None]:
# Build temporal graphs from processed data
print(f"Building temporal graphs with {config['num_snapshots']} snapshots...")

temporal_graphs = build_temporal_graphs_from_processed_data(
    processed_data=processed_data,
    num_snapshots=config['num_snapshots'],
    overlap_ratio=0.2  # 20% overlap between snapshots
)

print(f"Built {len(temporal_graphs)} temporal graphs")

In [None]:
# Diagnostic: Check for extreme values and clean graphs
print("Checking feature statistics and cleaning graphs...")

def clean_graph_features(temporal_graph):
    """Replace inf/nan values with finite values."""
    for snap in temporal_graph.snapshots:
        # Replace inf with large finite value, nan with 0
        snap.x = torch.nan_to_num(snap.x, nan=0.0, posinf=100.0, neginf=-100.0)
        snap.edge_attr = torch.nan_to_num(snap.edge_attr, nan=0.0, posinf=100.0, neginf=-100.0)
    return temporal_graph

# Find problematic graphs
problematic_indices = []
for i, (g, _) in enumerate(temporal_graphs):
    for snap in g.snapshots:
        if torch.isnan(snap.x).any() or torch.isinf(snap.x).any():
            problematic_indices.append(i)
            break
        if torch.isnan(snap.edge_attr).any() or torch.isinf(snap.edge_attr).any():
            problematic_indices.append(i)
            break

print(f"Found {len(problematic_indices)} graphs with NaN/Inf values")

# Clean all graphs
cleaned_temporal_graphs = []
for g, name in temporal_graphs:
    cleaned_g = clean_graph_features(g)
    cleaned_temporal_graphs.append((cleaned_g, name))

temporal_graphs = cleaned_temporal_graphs

# Verify cleaning worked
all_max_x = max(g[0].snapshots[0].x.max().item() for g in temporal_graphs)
all_min_x = min(g[0].snapshots[0].x.min().item() for g in temporal_graphs)
print(f"After cleaning - Node feature range: [{all_min_x:.4f}, {all_max_x:.4f}]")

# Check for remaining issues
has_issues = False
for g, _ in temporal_graphs:
    for snap in g.snapshots:
        if torch.isnan(snap.x).any() or torch.isinf(snap.x).any():
            has_issues = True
            break
        if torch.isnan(snap.edge_attr).any() or torch.isinf(snap.edge_attr).any():
            has_issues = True
            break

if has_issues:
    print("WARNING: Still have NaN/Inf values after cleaning!")
else:
    print("All graphs cleaned successfully - no NaN/Inf values remain")

In [None]:
# Analyze graph statistics
num_nodes_list = [g[0].num_nodes for g in temporal_graphs]
num_edges_list = [sum(s.edge_index.size(1) for s in g[0].snapshots) for g in temporal_graphs]

print("\nGraph Statistics:")
print(f"  Nodes - Min: {min(num_nodes_list)}, Max: {max(num_nodes_list)}, "
      f"Mean: {np.mean(num_nodes_list):.1f}")
print(f"  Edges - Min: {min(num_edges_list)}, Max: {max(num_edges_list)}, "
      f"Mean: {np.mean(num_edges_list):.1f}")

# Get feature dimensions
sample_graph = temporal_graphs[0][0]
sample_snapshot = sample_graph.snapshots[0]
node_dim = sample_snapshot.x.size(1)
edge_dim = sample_snapshot.edge_attr.size(1)

print(f"\nFeature Dimensions:")
print(f"  Node features: {node_dim}")
print(f"  Edge features: {edge_dim}")

## 6. Create Data Splits

In [None]:
from sklearn.model_selection import train_test_split

# Extract graphs and labels
graphs = [g[0] for g in temporal_graphs]
graph_labels = [g[0].label.item() for g in temporal_graphs]

# First split: train+val vs test
train_val_graphs, test_graphs, train_val_labels, test_labels = train_test_split(
    graphs, graph_labels,
    test_size=config['test_ratio'],
    stratify=graph_labels,
    random_state=config['seed']
)

# Second split: train vs val
val_size = config['val_ratio'] / (config['train_ratio'] + config['val_ratio'])
train_graphs, val_graphs, train_labels, val_labels = train_test_split(
    train_val_graphs, train_val_labels,
    test_size=val_size,
    stratify=train_val_labels,
    random_state=config['seed']
)

print(f"Data Splits:")
print(f"  Train: {len(train_graphs)} ({len(train_graphs)/len(graphs)*100:.1f}%)")
print(f"  Val:   {len(val_graphs)} ({len(val_graphs)/len(graphs)*100:.1f}%)")
print(f"  Test:  {len(test_graphs)} ({len(test_graphs)/len(graphs)*100:.1f}%)")

print(f"\nTrain label distribution: {dict(pd.Series(train_labels).value_counts())}")
print(f"Val label distribution:   {dict(pd.Series(val_labels).value_counts())}")
print(f"Test label distribution:  {dict(pd.Series(test_labels).value_counts())}")

In [None]:
# Create custom collate function for temporal graphs
def temporal_collate_fn(batch):
    """Custom collate function for temporal graph data."""
    return batch  # Return list of TemporalGraphData objects

# Create data loaders
train_loader = DataLoader(
    train_graphs,
    batch_size=config['batch_size'],
    shuffle=True,
    collate_fn=temporal_collate_fn
)

val_loader = DataLoader(
    val_graphs,
    batch_size=config['batch_size'],
    shuffle=False,
    collate_fn=temporal_collate_fn
)

test_loader = DataLoader(
    test_graphs,
    batch_size=config['batch_size'],
    shuffle=False,
    collate_fn=temporal_collate_fn
)

print(f"Train batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")
print(f"Test batches: {len(test_loader)}")

In [None]:
# Compute class weights for imbalanced data
if config['use_class_weights']:
    class_counts = pd.Series(train_labels).value_counts().sort_index()
    total = len(train_labels)
    class_weights = torch.tensor(
        [total / (len(class_counts) * class_counts[i]) for i in range(len(class_counts))],
        dtype=torch.float
    )
    print(f"Class weights: {class_weights.tolist()}")
else:
    class_weights = None

## 7. Model Initialization

In [None]:
# Create the temporal GNN model
model = create_temporal_model(
    node_dim=node_dim,
    edge_dim=edge_dim,
    hidden_dim=config['hidden_dim'],
    num_classes=config['num_classes'],
    num_gine_layers=config['num_gine_layers'],
    num_temporal_layers=config['num_temporal_layers'],
    num_snapshots=config['num_snapshots'],
    dropout=config['dropout'],
    use_evolve_gcn=config['use_evolve_gcn'],
    pooling=config['pooling']
)

# Print model summary
print_model_summary(model)

In [None]:
# Initialize optimizer and scheduler
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=config['learning_rate'],
    weight_decay=config['weight_decay']
)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.5,
    patience=5
)

print(f"Optimizer: AdamW (lr={config['learning_rate']}, weight_decay={config['weight_decay']})")
print(f"Scheduler: ReduceLROnPlateau (factor=0.5, patience=5)")

## 8. Training

In [None]:
# Initialize trainer
trainer = TemporalTrainer(
    model=model,
    optimizer=optimizer,
    device=config['device'],
    scheduler=scheduler,
    class_weights=class_weights,
    gradient_clip=config['gradient_clip'],
    use_temporal=True  # Using temporal graphs
)

print("Trainer initialized!")

In [None]:
# Train the model
save_path = os.path.join(config['output_dir'], 'best_model.pt')

history = trainer.train(
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=config['num_epochs'],
    early_stopping_patience=config['early_stopping_patience'],
    save_path=save_path,
    verbose=True,
    log_interval=1
)

## 9. Training Visualization

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Loss plot
axes[0].plot(history['train_loss'], label='Train', color='blue')
axes[0].plot(history['val_loss'], label='Validation', color='orange')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training and Validation Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Accuracy plot
axes[1].plot(history['train_acc'], label='Train', color='blue')
axes[1].plot(history['val_acc'], label='Validation', color='orange')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].set_title('Training and Validation Accuracy')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Learning rate plot
axes[2].plot(history['learning_rate'], color='green')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('Learning Rate')
axes[2].set_title('Learning Rate Schedule')
axes[2].set_yscale('log')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(config['output_dir'], 'training_history.png'), dpi=150)
plt.show()

## 10. Model Evaluation

In [None]:
# Evaluate on test set
print("Evaluating on test set...")
test_results = trainer.evaluate(test_loader, desc='Test', progress_bar=True)

print(f"\nTest Results:")
print(f"  Loss: {test_results['loss']:.4f}")
print(f"  Accuracy: {test_results['accuracy']:.4f}")

In [None]:
# Compute detailed metrics
test_metrics = compute_classification_metrics(
    y_true=test_results['labels'],
    y_pred=test_results['predictions'],
    y_prob=test_results['probabilities']
)

print("\n" + "="*50)
print("DETAILED TEST METRICS")
print("="*50)
print(f"Accuracy:    {test_metrics['accuracy']:.4f}")
print(f"Precision:   {test_metrics['precision']:.4f}")
print(f"Recall:      {test_metrics['recall']:.4f}")
print(f"F1 Score:    {test_metrics['f1']:.4f}")
if 'auroc' in test_metrics:
    print(f"AUROC:       {test_metrics['auroc']:.4f}")
if 'auprc' in test_metrics:
    print(f"AUPRC:       {test_metrics['auprc']:.4f}")
print(f"\nConfusion Matrix:")
print(f"  True Negatives:  {test_metrics.get('true_negatives', 'N/A')}")
print(f"  False Positives: {test_metrics.get('false_positives', 'N/A')}")
print(f"  False Negatives: {test_metrics.get('false_negatives', 'N/A')}")
print(f"  True Positives:  {test_metrics.get('true_positives', 'N/A')}")

In [None]:
# Plot confusion matrix
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc, precision_recall_curve

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Confusion Matrix
cm = confusion_matrix(test_results['labels'], test_results['predictions'])
disp = ConfusionMatrixDisplay(cm, display_labels=['Benign', 'Malicious'])
disp.plot(ax=axes[0], cmap='Blues', colorbar=False)
axes[0].set_title('Confusion Matrix')

# ROC Curve
fpr, tpr, _ = roc_curve(test_results['labels'], test_results['probabilities'])
roc_auc = auc(fpr, tpr)
axes[1].plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.3f})')
axes[1].plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
axes[1].set_xlim([0.0, 1.0])
axes[1].set_ylim([0.0, 1.05])
axes[1].set_xlabel('False Positive Rate')
axes[1].set_ylabel('True Positive Rate')
axes[1].set_title('ROC Curve')
axes[1].legend(loc='lower right')
axes[1].grid(True, alpha=0.3)

# Precision-Recall Curve
precision, recall, _ = precision_recall_curve(test_results['labels'], test_results['probabilities'])
pr_auc = auc(recall, precision)
axes[2].plot(recall, precision, color='green', lw=2, label=f'PR curve (AUC = {pr_auc:.3f})')
axes[2].set_xlim([0.0, 1.0])
axes[2].set_ylim([0.0, 1.05])
axes[2].set_xlabel('Recall')
axes[2].set_ylabel('Precision')
axes[2].set_title('Precision-Recall Curve')
axes[2].legend(loc='lower left')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(config['output_dir'], 'evaluation_plots.png'), dpi=150)
plt.show()

## 11. Analysis and Insights

In [None]:
# Analyze prediction distribution
probs = np.array(test_results['probabilities'])
labels = np.array(test_results['labels'])

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Distribution of predicted probabilities
axes[0].hist(probs[labels == 0], bins=30, alpha=0.7, label='Benign', color='green')
axes[0].hist(probs[labels == 1], bins=30, alpha=0.7, label='Malicious', color='red')
axes[0].axvline(x=0.5, color='black', linestyle='--', label='Threshold')
axes[0].set_xlabel('Predicted Probability (Malicious)')
axes[0].set_ylabel('Count')
axes[0].set_title('Distribution of Predicted Probabilities')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Calibration plot
from sklearn.calibration import calibration_curve
prob_true, prob_pred = calibration_curve(labels, probs, n_bins=10)
axes[1].plot(prob_pred, prob_true, marker='o', label='Model')
axes[1].plot([0, 1], [0, 1], linestyle='--', color='gray', label='Perfectly Calibrated')
axes[1].set_xlabel('Mean Predicted Probability')
axes[1].set_ylabel('Fraction of Positives')
axes[1].set_title('Calibration Plot')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(config['output_dir'], 'analysis_plots.png'), dpi=150)
plt.show()

In [None]:
# Summary statistics
print("\n" + "="*50)
print("FINAL SUMMARY")
print("="*50)
print(f"\nModel: HeteroGINEEvolveGCN")
print(f"  - Hidden dim: {config['hidden_dim']}")
print(f"  - GINEConv layers: {config['num_gine_layers']}")
print(f"  - Temporal layers: {config['num_temporal_layers']}")
print(f"  - Snapshots: {config['num_snapshots']}")
print(f"  - Temporal encoder: {'EvolveGCN-O' if config['use_evolve_gcn'] else 'LSTM'}")
print(f"  - Pooling: {config['pooling']}")

print(f"\nDataset:")
print(f"  - Total tokens: {len(graphs)}")
print(f"  - Train/Val/Test: {len(train_graphs)}/{len(val_graphs)}/{len(test_graphs)}")

print(f"\nBest Validation Loss: {min(history['val_loss']):.4f}")
print(f"Best Validation Acc:  {max(history['val_acc']):.4f}")

print(f"\nTest Performance:")
print(f"  - Accuracy:  {test_metrics['accuracy']:.4f}")
print(f"  - Precision: {test_metrics['precision']:.4f}")
print(f"  - Recall:    {test_metrics['recall']:.4f}")
print(f"  - F1 Score:  {test_metrics['f1']:.4f}")
if 'auroc' in test_metrics:
    print(f"  - AUROC:     {test_metrics['auroc']:.4f}")

## 12. Save Results

In [None]:
import json

# Save metrics to JSON
results = {
    'config': {k: str(v) if isinstance(v, Path) else v for k, v in config.items()},
    'training_history': {
        'final_train_loss': history['train_loss'][-1],
        'final_train_acc': history['train_acc'][-1],
        'final_val_loss': history['val_loss'][-1],
        'final_val_acc': history['val_acc'][-1],
        'best_val_loss': min(history['val_loss']),
        'best_val_acc': max(history['val_acc']),
        'epochs_trained': len(history['train_loss'])
    },
    'test_metrics': test_metrics,
    'dataset_stats': {
        'total_tokens': len(graphs),
        'train_size': len(train_graphs),
        'val_size': len(val_graphs),
        'test_size': len(test_graphs)
    }
}

results_path = os.path.join(config['output_dir'], 'results.json')
with open(results_path, 'w') as f:
    json.dump(results, f, indent=2, default=str)

print(f"Results saved to {results_path}")

In [None]:
# Save training history
history_df = pd.DataFrame(history)
history_path = os.path.join(config['output_dir'], 'training_history.csv')
history_df.to_csv(history_path, index=False)
print(f"Training history saved to {history_path}")

print(f"\nAll outputs saved to: {config['output_dir']}")
print(f"Files: {os.listdir(config['output_dir'])}")

---

## Conclusion

This notebook demonstrates the complete pipeline for token malice detection using Temporal GNNs:

1. **Data Preprocessing**: Loading and filtering token transaction data
2. **Graph Construction**: Building temporal graphs with multiple snapshots
3. **Model Training**: Training the HeteroGINE-EvolveGCN model
4. **Evaluation**: Comprehensive metrics and visualizations

### Key Components:
- **GINEConv**: Captures spatial relationships in transaction graphs
- **LSTM/EvolveGCN-O**: Captures temporal dynamics across snapshots
- **Global Attention**: Provides interpretable graph-level readout

### Future Improvements:
- Hyperparameter tuning
- Ensemble methods
- Additional temporal features
- Cross-validation for more robust evaluation