# Economic Downturn Detector: Multiple Discriminant Analysis

This notebook applies Multiple Discriminant Analysis (MDA) to identify the most significant economic indicators for predicting recessions.

In [None]:
# Import notebook utilities
from notebook_utils import (
    # Setup functions
    setup_notebook, load_data, display_data_info, save_figure,
    
    # Import from econ_downturn package
    apply_mda, create_discriminant_time_series,
    plot_feature_importance, plot_mda_projection, plot_discriminant_time_series
)

# Import other libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from IPython.display import display
from sklearn.model_selection import train_test_split

# Set up the notebook environment
setup_notebook()

## 1. Load the Processed Data

Let's load the processed datasets created in the feature engineering notebook.

In [None]:
# Get output paths
from econ_downturn import get_output_paths
output_paths = get_output_paths()
data_dir = output_paths['data_dir']

# Load the dataset with features
features_path = os.path.join(data_dir, 'data_with_features.csv')
if os.path.exists(features_path):
    data_features = pd.read_csv(features_path, index_col=0, parse_dates=True)
    print(f"Loaded dataset with features, shape: {data_features.shape}")
else:
    print(f"Dataset with features not found at {features_path}")
    data_features = pd.DataFrame()

# Load the normalized dataset
normalized_path = os.path.join(data_dir, 'data_normalized.csv')
if os.path.exists(normalized_path):
    data_normalized = pd.read_csv(normalized_path, index_col=0, parse_dates=True)
    print(f"Loaded normalized dataset, shape: {data_normalized.shape}")
else:
    print(f"Normalized dataset not found at {normalized_path}")
    data_normalized = pd.DataFrame()

# Load the PCA dataset
pca_path = os.path.join(data_dir, 'data_pca.csv')
if os.path.exists(pca_path):
    data_pca = pd.read_csv(pca_path, index_col=0, parse_dates=True)
    print(f"Loaded PCA dataset, shape: {data_pca.shape}")
else:
    print(f"PCA dataset not found at {pca_path}")
    data_pca = pd.DataFrame()

## 2. Prepare Data for MDA

Let's prepare the data for Multiple Discriminant Analysis.

In [None]:
# Function to prepare data for MDA
def prepare_data_for_mda(data):
    if data.empty or 'recession' not in data.columns:
        print("Data is empty or does not contain the recession indicator.")
        return None, None
    
    # Separate features and target
    X = data.drop(columns=['recession'])
    y = data['recession']
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42, stratify=y
    )
    
    return (X_train, X_test, y_train, y_test), (X, y)

# Prepare the datasets for MDA
datasets = {}

if not data_features.empty and 'recession' in data_features.columns:
    datasets['features'] = prepare_data_for_mda(data_features)
    print("Prepared dataset with features for MDA")

if not data_normalized.empty and 'recession' in data_normalized.columns:
    datasets['normalized'] = prepare_data_for_mda(data_normalized)
    print("Prepared normalized dataset for MDA")

if not data_pca.empty and 'recession' in data_pca.columns:
    datasets['pca'] = prepare_data_for_mda(data_pca)
    print("Prepared PCA dataset for MDA")

## 3. Apply Multiple Discriminant Analysis

Let's apply MDA to identify the most significant predictors of recessions.

In [None]:
# Function to apply MDA and evaluate the model
def apply_mda_and_evaluate(dataset_name, split_data, full_data):
    if split_data is None or full_data is None:
        print(f"No data available for {dataset_name}")
        return None
    
    X_train, X_test, y_train, y_test = split_data
    X_full, y_full = full_data
    
    # Apply MDA using the package function
    mda_results = apply_mda(X_train, y_train, X_test, y_test, X_full, y_full)
    
    print(f"\nResults for {dataset_name} dataset:")
    print(f"Accuracy: {mda_results['accuracy']:.4f}")
    print("\nConfusion Matrix:")
    print(mda_results['conf_matrix'])
    print("\nClassification Report:")
    print(mda_results['class_report'])
    print(f"\nCross-Validation Scores: {mda_results['cv_scores']}")
    print(f"Mean CV Score: {mda_results['cv_scores'].mean():.4f}")
    
    # Plot feature importances
    if mda_results['feature_importance'] is not None:
        print("\nTop 10 Most Important Features:")
        display(mda_results['feature_importance'].head(10))
        
        # Plot feature importances
        fig = plot_feature_importance(mda_results['feature_importance'], top_n=15)
        plt.title(f'Top 15 Feature Importances - {dataset_name} Dataset', fontsize=16)
        plt.tight_layout()
        plt.show()
        
        # Save the figure
        save_figure(fig, f"feature_importance_{dataset_name}.png")
    
    return mda_results

# Apply MDA to each dataset
mda_results = {}

for name, (split_data, full_data) in datasets.items():
    mda_results[name] = apply_mda_and_evaluate(name, split_data, full_data)

## 4. Visualize MDA Results

Let's visualize the results of the MDA to better understand the separation between recessionary and non-recessionary periods.

In [None]:
# Function to visualize MDA results
def visualize_mda_results(dataset_name, mda_results, split_data, full_data):
    if mda_results is None or split_data is None or full_data is None:
        print(f"No data available for {dataset_name}")
        return
    
    X_train, X_test, y_train, y_test = split_data
    X_full, y_full = full_data
    
    # Plot MDA projection
    fig = plot_mda_projection(mda_results, X_train, y_train, X_test, y_test)
    plt.title(f'MDA Projection - {dataset_name} Dataset', fontsize=16)
    plt.tight_layout()
    plt.show()
    
    # Save the figure
    save_figure(fig, f"mda_projection_{dataset_name}.png")
    
    # Create discriminant time series
    discriminant_df = create_discriminant_time_series(mda_results['model'], X_full, y_full)
    
    # Plot discriminant time series
    fig = plot_discriminant_time_series(discriminant_df)
    plt.title(f'Discriminant Function Over Time - {dataset_name} Dataset', fontsize=16)
    plt.tight_layout()
    plt.show()
    
    # Save the figure
    save_figure(fig, f"discriminant_time_series_{dataset_name}.png")

# Visualize MDA results for each dataset
for name, results in mda_results.items():
    if results is not None:
        visualize_mda_results(name, results, datasets[name][0], datasets[name][1])

## 5. Identify Key Recession Predictors

Let's identify the most significant economic indicators for predicting recessions based on the MDA results.

In [None]:
# Function to identify key recession predictors
def identify_key_predictors(dataset_name, mda_results):
    if mda_results is None or mda_results['feature_importance'] is None:
        print(f"No feature importance available for {dataset_name}")
        return
    
    feature_importance = mda_results['feature_importance']
    
    print(f"\nKey Recession Predictors from {dataset_name} Dataset:")
    
    # Get the top 10 features
    top_features = feature_importance.head(10)
    display(top_features)
    
    # Categorize the features
    categories = {
        'Original': [],
        'Lag': [],
        'Rate of Change': []
    }
    
    for feature in top_features['Feature']:
        if '_lag' in feature:
            categories['Lag'].append(feature)
        elif '_pct_change' in feature or '_roc' in feature:
            categories['Rate of Change'].append(feature)
        else:
            categories['Original'].append(feature)
    
    print("\nFeatures by Category:")
    for category, features in categories.items():
        print(f"\n{category} Features:")
        for feature in features:
            print(f"- {feature}")

# Identify key predictors for each dataset
for name, results in mda_results.items():
    if results is not None:
        identify_key_predictors(name, results)

## 6. Conclusion

Based on the Multiple Discriminant Analysis, we have identified the most significant economic indicators for predicting recessions. These indicators can be used to develop an early warning system for potential economic downturns.

Key findings:
1. The most important predictors of recessions include [to be filled based on actual results]
2. The MDA model achieved an accuracy of [to be filled based on actual results] in classifying recessionary and non-recessionary periods
3. Lag variables and rates of change provide valuable information for predicting recessions

Next steps:
1. Develop a real-time monitoring system for these key indicators
2. Create a recession probability index based on the discriminant function
3. Explore additional modeling techniques to improve prediction accuracy