In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

# Step 1: Load and preprocess the data
df = pd.read_csv('archive/US_Recession.csv')

# Drop the 'Unnamed: 0' column
df.drop(columns=['Unnamed: 0'], inplace=True)

# Handle missing values (if any)
df.dropna(inplace=True)

# Separate predictors (X) and target variable (y)
X = df.drop(columns=['Recession'])  # Predictor variables
y = df['Recession']  # Target variable

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 2: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Step 3: Train the Logistic Regression Model
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)

# Step 4: Extract the Coefficients (Beta Values)
intercept = logistic_model.intercept_[0]  # Intercept (beta_0)
coefficients = logistic_model.coef_[0]    # Coefficients (beta_1, beta_2, ..., beta_n)

# Combine intercept and coefficients with feature names
feature_names = ['Intercept'] + list(df.drop(columns=['Recession']).columns)
beta_values = [intercept] + list(coefficients)

# Create a DataFrame to display the beta values
beta_df = pd.DataFrame(beta_values, index=feature_names, columns=['Beta'])
print(beta_df)


                   Beta
Intercept     -2.624680
Price_x       -1.592147
INDPRO        -0.418250
CPI            0.206246
3 Mo          -0.230349
4 Mo          -0.121164
6 Mo          -0.293235
1 Yr          -0.333512
2 Yr          -0.290137
3 Yr          -0.248611
5 Yr          -0.106690
7 Yr           0.077335
10 Yr          0.397243
20 Yr         -0.365976
30 Yr          0.247132
GDP           -0.235949
Rate           1.119117
BBK_Index      0.874357
Housing_Index  1.723402
