# Load Data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
import itertools

In [2]:
idx = pd.read_csv('res_10.csv')


In [3]:
idx['ID'].unique()

array(['LUACTRUU Index', 'SPX Index', 'BCOM Index', 'LBUSTRUU Index',
       'LF98TRUU Index', 'LUATTRUU Index', 'LUMSTRUU Index',
       'LBEATREU Index', 'LP01TREU Index', 'LG30TRUU Index',
       'EMUSTRUU Index', 'LGTRTRUU Index', 'LGDRTRUU Index',
       'LUGCTRUU Index', 'LP06TREU Index', 'LF94TRUU Index',
       'LACHTRUU Index', 'LD08TRUU Index', 'LC07TRUU Index',
       'USYC2Y10 Index', 'LECPTREU Index'], dtype=object)

In [4]:
def return_correlation_ts(df,idx_1, idx_2, corr_window=30, ma_window=30):
    combined = pd.DataFrame()
    combined['DATE'] = df[df['ID'] == idx_1]['DATE']
    combined[idx_1] = df[df['ID'] == idx_1]['PX_LAST'].values
    combined[idx_2] = df[df['ID'] == idx_2]['PX_LAST'].values
    combined = combined[combined[idx_1].notna()]
    combined = combined[combined[idx_2].notna()]
    combined[f'corr_{idx_1}_{idx_2}'] = combined[idx_1].rolling(corr_window).corr(combined[idx_2])
    combined[f'MA_{ma_window}'] = combined[f'corr_{idx_1}_{idx_2}'].rolling(ma_window).mean()
    combined.index = pd.to_datetime(combined['DATE'].values)
    return combined

corr = return_correlation_ts(idx,'SPX Index', 'LUACTRUU Index')

In [5]:
def split_ts(ts, training_fold=2/3):
    train = int(ts.dropna().shape[0]*training_fold)
    y_train = ts.iloc[0:train].dropna()
    y_test = ts.iloc[train+1:-1].dropna()
    y_train = y_train.asfreq('D', method='bfill')
    y_test = y_test.asfreq('D', method='bfill')

    return y_train, y_test


In [6]:
y_train, y_test = split_ts(corr['MA_30'])


# DC Regressor Class

In [15]:
from sktime.forecasting.compose import make_reduction
from evaluation_registry import DCEvaluator

class RegressorDC():
    def __init__(self, regressor, y_train, y_test):
        self.regressor = regressor
        self.y_train = y_train
        self.y_test = y_test
    def fit_predict(self):
        """Returns predictions in pd.Series of float        
        """
        initial_window=int(len(self.y_train)*0.8)
        # fh = ForecastingHorizon(y_test.index, is_relative=False)
        
        forecaster = make_reduction(self.regressor, window_length=15, strategy="recursive")
        # cv = SlidingWindowSplitter(initial_window=initial_window, window_length=20)
        # param_grid = {"window_length": [7, 12, 15]}
        # forecaster = ForecastingGridSearchCV(forecaster, strategy="refit", cv=cv, param_grid=param_grid)

        for i in range(len(self.y_test)):
        
            date_index = pd.to_datetime([])
            y_pred = pd.Series(index=date_index)
            for i in range(len(self.y_test)):
                comb_y = pd.concat([self.y_train, self.y_test[0:i]])
                comb_y = comb_y.asfreq('D', method='bfill')
                forecaster.fit(y=comb_y, X=None, fh=[1])
                pred = forecaster.predict()
                y_pred[pred.index[0]]=pred.values[0]
            
            self.y_pred = y_pred
            return y_pred
        
    def convert_to_dc(self,y1,y2):
        """Converts y2 from pandas series of float to pandas series of int where 1 indicates 
        increase compared to the previous value in the series and 0 indicates a decrease
        Parameters
        ----------

        y1 : pandas series of float  
        y2 : pandas series of float

        Returns
        -------
            pandas series of int
        """
        concatenated = pd.concat([y1[-2:-1],y2])
        true_dc = concatenated.shift(-1) > concatenated
        true_dc = true_dc[0:-1]
        true_dc[true_dc==True] =1
        true_dc[true_dc==False] =0

        return true_dc.astype(int)
    
    def evaluate(self):

        evaluator = DCEvaluator()
        true_dc = self.convert_to_dc(y1=self.y_train, y2=self.y_test)
        pred_dc = self.convert_to_dc(y1=self.y_train, y2=self.y_pred)
        accuracy, f1,fpr, tpr, area_under_the_curve = evaluator.evaluate(y_train, true_dc, pred_dc)
        return accuracy, f1,fpr, tpr, area_under_the_curve
        
    


# Fit

In [13]:
from sklearn.ensemble import RandomForestRegressor
from sklearn import linear_model



In [16]:
regressor_lasso = linear_model.Lasso()
regressor_dc_lasso  = RegressorDC(regressor=regressor_lasso, y_train=y_train, y_test=y_test)
lasso_pred  = regressor_dc_lasso.fit_predict()


In [None]:
regressor_rf = RandomForestRegressor(n_jobs=12)

regressor_dc_rf  = RegressorDC(regressor=regressor_rf, y_train=y_train, y_test=y_test)

rf_pred = regressor_dc_rf.fit_predict()

# Evaluate

In [18]:
# accuracy_rf, f1_rf,fpr_rf, tpr_rf, area_under_the_curve_rf = regressor_dc_rf.evaluate()
accuracy_lasso, f1_rf,fpr_lasso, tpr_lasso, area_under_the_curve_lasso = regressor_dc_lasso.evaluate()

In [19]:
accuracy_lasso

0.3107789142407553