In [1]:
from data import get_recessions, RecessionDatasetBuilder

builder = RecessionDatasetBuilder()
data = builder.create_data(features_config={
    "Real GDP": 3,
    "Unemployment Rate": 3,
    "Inflation": 2
}, window=6)

recessions = get_recessions(builder.start_date)

In [2]:
data

Unnamed: 0,Real GDP (t-2),Real GDP (t-1),Real GDP (t-0),Unemployment Rate (t-2),Unemployment Rate (t-1),Unemployment Rate (t-0),Inflation (t-1),Inflation (t-0),Recession
1968-02-01,0.946076,0.753853,2.039271,-0.1,-0.1,0.1,3.651861,3.673819,0
1968-03-01,0.946076,0.753853,2.039271,-0.1,0.1,-0.1,3.673819,4.142164,0
1968-04-01,0.753853,2.039271,1.670373,0.1,-0.1,-0.2,4.142164,4.155828,0
1968-05-01,0.753853,2.039271,1.670373,-0.1,-0.2,0.0,4.155828,4.088245,0
1968-06-01,0.753853,2.039271,1.670373,-0.2,0.0,0.2,4.088245,4.545569,0
...,...,...,...,...,...,...,...,...,...
2024-08-01,0.404802,0.738980,0.759510,0.1,0.1,0.0,4.179707,4.112096,0
2024-09-01,0.404802,0.738980,0.759510,0.1,0.0,-0.1,4.112096,4.012724,0
2024-10-01,0.738980,0.759510,0.607065,0.0,-0.1,0.0,4.012724,3.963395,0
2024-11-01,0.738980,0.759510,0.607065,-0.1,0.0,0.1,3.963395,3.876177,0


In [3]:
from sklearn.utils import compute_sample_weight

X = data.drop(columns=["Recession"])
y = data["Recession"]

sample_weights = compute_sample_weight(class_weight="balanced", y=y)

In [4]:
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.pipeline import Pipeline
import pandas as pd

def tune_model(X: pd.DataFrame, y: pd.Series, model: Pipeline, param_grid: dict) -> Pipeline:
    tscv = TimeSeriesSplit(n_splits=5)

    search = GridSearchCV(
        model,
        param_grid=param_grid,
        scoring="average_precision",
        cv=tscv
    )

    search.fit(X, y)

    return search.best_estimator_

In [12]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

lr = make_pipeline(
    StandardScaler(),
    LogisticRegression(
        penalty="l1",
        class_weight="balanced",
        solver="liblinear",
        random_state=42
    )
)

rbf_svm = make_pipeline(
    StandardScaler(),
    SVC(
        kernel="rbf",
        probability=True,
        random_state=42
    )
)

In [13]:
from model import ModelSelector

selector = ModelSelector([
    (
        "Logistic Regression", lr, {
            "logisticregression__tol": [1e-5, 1e-4, 1e-3],
            "logisticregression__max_iter": [100, 500, 1000],
        }
    ),
    (
        "RBF SVM", rbf_svm, {
            "svc__C": [0.001, 0.01, 0.1, 1],
            "svc__max_iter": [100, 500, 1000]
        }
    )
])
selector.fit(X, y)



In [14]:
selector.compare_models(X, y)



Unnamed: 0,Logistic Regression,RBF SVM
Average Precision,0.222834,0.266827
Weighted Average Precision,0.622147,0.660281
ROC AUC,0.560734,0.592412
Accuracy,0.826549,0.630088
Weighted Accuracy,0.498805,0.54499
Precision,0.226087,0.120085
Weighted Precision,0.338865,0.544057
Recall,0.114286,0.457143
Weighted Recall,0.114286,0.457143
