# Model Training

Estimates the SVM models and saves the models and necceary files (e.g. scalars) for productionalization 

**Import** - scikit learn version must match the same version that will be used in product. Therefore specify the scikit-learn installed version here and in production

`pip install scikit-learn==0.21.3`


In [25]:
import path
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import pandas as pd 
import json
from datetime import datetime, timedelta
import pickle

In [92]:
# Get training data (final data)
full_df  = pd.read_csv(path.dir_path + "data/processed_data/final_data.csv")
 

In [48]:
# specify the test dates

testing_dates = {}
now = datetime.now()
month = now.strftime('%m')
year = now.year        
most_recent_date = '{}-{}-01'.format(year, month)
 
testing_dates  = {'cv_start': '2010-10-01', 
                   'cv_end': '2021-07-01', 
                    'pred_start': '2021-08-01',
                    'pred_end': most_recent_date} 

full_df = full_df.sort_values(['Dates'], ascending=True)
full_df.reset_index(inplace=True)
full_df.drop('index', axis=1, inplace=True)
date_condition = ((full_df['Dates'] <= testing_dates['pred_end']) &
                  (full_df['Dates'] >= testing_dates['pred_start']))
pred_indices = list(full_df[date_condition].index)\

feature_names = ['Payrolls_3mo_vs_12mo',
                      'Real_Fed_Funds_Rate_12mo_chg',
                      'CPI_3mo_pct_chg_annualized',
                      '10Y_Treasury_Rate_12mo_chg',
                      '3M_10Y_Treasury_Spread',
                      'S&P_500_12mo_chg']
output_names = ['Recession','Recession_within_6mo',  'Recession_within_12mo', 'Recession_within_24mo']



#### SVM Hyperparametas for Gamma and C 

In [82]:
C= {}
gamma = {}

C[24] = 0.075
C[12] = 0.5
C[6] = 2.5

gamma[24] = 0.041666666666666005
gamma[12] = 0.041666666666666005
gamma[6] = 0.041666666666666005


#### Save the model state for productionalization

In [90]:

models = [6,12,24]
for model in models:

    output_name = f'Recession_within_{model}mo'
    optimal_C = C[model]   
    optimal_gamma = gamma[model]  
    all_predicted_probs = pd.DataFrame()
    svm_predictions = {}

    all_testing_y = pd.Series()
    dates = []
    training_x = full_df.loc[: (pred_indices[0] - 1), feature_names]
    training_y = full_df.loc[: (pred_indices[0] - 1), output_name]
    scaler = StandardScaler()
    scaler.fit(training_x)

    scaler.fit(training_x)
    training_x_scaled = scaler.transform(training_x)
    svm = SVC(C=optimal_C, kernel='rbf', gamma=optimal_gamma,
              probability=True, tol=1e-3, random_state=123,
              class_weight='balanced')


    svm.fit(X=training_x_scaled, y=training_y) 

    with open(f"./models/scaler{model}.pickle", 'wb') as s:
        pickle.dump(scaler,s)
    with open(f"./models/model{model}.pickle", 'wb') as m:
        pickle.dump(svm,m)