# Supervised data preparation for ML approaches of recession prediction
- Dataframe index: Date (Monthly from 1971-01 to 2023-02)
- Features: values from each econ index
- Labels: recession within M months (M=0, 6, 12)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

## 0. Try with only index 5

In [24]:
df = pd.read_csv('../../Datasets/index-5-NFCICREDIT.csv') #Dataset file
df.head()

Unnamed: 0,DATE,NFCICREDIT
0,1971-01-08,-1.90407
1,1971-01-15,-1.90886
2,1971-01-22,-1.91133
3,1971-01-29,-1.91241
4,1971-02-05,-1.91255


In [25]:
df['DATE'] = pd.to_datetime(df['DATE'])
df.set_index(df['DATE'], drop=True, inplace=True)
df.head()

Unnamed: 0_level_0,DATE,NFCICREDIT
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1
1971-01-08,1971-01-08,-1.90407
1971-01-15,1971-01-15,-1.90886
1971-01-22,1971-01-22,-1.91133
1971-01-29,1971-01-29,-1.91241
1971-02-05,1971-02-05,-1.91255


In [31]:
THRES_MONTH = 6 # we will consider recession within THRES_MONTH months
recession_periods = [('1953-07-01', '1954-05-01'), ('1957-08-01', '1958-04-01'), ('1960-04-01', '1961-02-01'), ('1969-12-01', '1970-11-01'), 
                     ('1973-11-01', '1975-03-01'), ('1980-01-01', '1980-07-01'), ('1981-07-01', '1982-11-01'), ('1990-07-01', '1991-03-01'),
                     ('2001-03-01', '2001-11-01'), ('2007-12-01', '2009-06-01'), ('2020-02-01', '2020-04-01')]

def is_recession(date):
  for period in recession_periods:
    if date >= pd.to_datetime(period[0]) + pd.DateOffset(months=-THRES_MONTH) and date <= pd.to_datetime(period[1]):
      return 1
  return 0

THRES_MONTH = 0
df['Recession'] = df['DATE'].map(is_recession)
THRES_MONTH = 6
df['Recession_within_6mo'] = df['DATE'].map(is_recession)
THRES_MONTH = 12
df['Recession_within_12mo'] = df['DATE'].map(is_recession)

df['1973-04-01':'1974-01-01']

Unnamed: 0_level_0,DATE,NFCICREDIT,Recession_within_6mo,Recession_within_12mo,Recession
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1973-04-06,1973-04-06,-2.52859,0,1,0
1973-04-13,1973-04-13,-2.51344,0,1,0
1973-04-20,1973-04-20,-2.49494,0,1,0
1973-04-27,1973-04-27,-2.47244,0,1,0
1973-05-04,1973-05-04,-2.44525,1,1,0
1973-05-11,1973-05-11,-2.41299,1,1,0
1973-05-18,1973-05-18,-2.3742,1,1,0
1973-05-25,1973-05-25,-2.32808,1,1,0
1973-06-01,1973-06-01,-2.27374,1,1,0
1973-06-08,1973-06-08,-2.21059,1,1,0


## 1. Date-Recession Data
- Dataframe index: Date (Monthly from 1971-01 to 2023-02)
- Labels: recession within M months (M=0, 6, 12)

In [51]:
df_rec = pd.DataFrame(pd.date_range(start='1952-10-01', end='2023-03-01', freq='M'), columns=['DATE'])
df_rec.set_index(df_rec['DATE'], inplace=True)
df_rec.head()

Unnamed: 0_level_0,DATE
DATE,Unnamed: 1_level_1
1952-10-31,1952-10-31
1952-11-30,1952-11-30
1952-12-31,1952-12-31
1953-01-31,1953-01-31
1953-02-28,1953-02-28


In [52]:
THRES_MONTH = 0
df_rec['Recession'] = df_rec['DATE'].map(is_recession)
THRES_MONTH = 6
df_rec['Recession_within_6mo'] = df_rec['DATE'].map(is_recession)
THRES_MONTH = 12
df_rec['Recession_within_12mo'] = df_rec['DATE'].map(is_recession)

df_rec['1973-04-01':'1974-01-01']

Unnamed: 0_level_0,DATE,Recession,Recession_within_6mo,Recession_within_12mo
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1973-04-30,1973-04-30,0,0,1
1973-05-31,1973-05-31,0,1,1
1973-06-30,1973-06-30,0,1,1
1973-07-31,1973-07-31,0,1,1
1973-08-31,1973-08-31,0,1,1
1973-09-30,1973-09-30,0,1,1
1973-10-31,1973-10-31,0,1,1
1973-11-30,1973-11-30,1,1,1
1973-12-31,1973-12-31,1,1,1


In [53]:
df_rec.drop(columns=['DATE'], inplace=True)
df_rec.head()

Unnamed: 0_level_0,Recession,Recession_within_6mo,Recession_within_12mo
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1952-10-31,0,0,0
1952-11-30,0,0,0
1952-12-31,0,0,0
1953-01-31,0,0,0
1953-02-28,0,0,0


In [54]:
df_rec.to_csv('../../Datasets/date-recession-monthly.csv')

## 2. Date-Recession with feature values (econ indices)
- Dataframe index: Date (Monthly from 1971-01 to 2023-02)
- Features: values from each econ index
    - for now exlcude index 2 (New Orders) because it has too little data (starts from 1992-02)
- Labels: recession within M months (M=0, 6, 12)