In [None]:
import numpy as np
import pandas as pd
from glob import glob

from matplotlib import pyplot as plt

from parse_input_file import parse_file
from create_feature_dfs import compose_signals_df, create_initial_input, create_df_only_outs

from statsmodels.stats.stattools import durbin_watson
from scipy.stats import jarque_bera

from ARMA import ARMA
from metrics import akkake_criteria, rmse, determination_coef, rsse, adjusted_det_coef, log_likelihood, mean_dependent_var, std_dependent_var, shwarz_criteria   

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

In [None]:
features_df_dorbin = create_df_only_outs('data/example_for_DW.txt')
features_df_time_series = create_df_only_outs('data/rts1.txt')

In [None]:
feature_df = pd.DataFrame({'x_'+str(i):create_df_only_outs(el) for i, el in enumerate(glob('data/RTS*'))})

feature_df['y'] = create_df_only_outs('data/rts1.txt')

In [None]:
feature_df.head()

# Dorbin Watson statistic

In [None]:
print("Durbin Watsons statistic: {}".format(durbin_watson(features_df_dorbin)))

# Time Series stats

In [None]:
features_df_time_series.hist(bins=20)

In [None]:
pd.Series(data=[features_df_time_series.mean(),
                 features_df_time_series.median(),
                 features_df_time_series.max(),
                 features_df_time_series.min(),
                 features_df_time_series.std(),
                 features_df_time_series.skew(),
                 features_df_time_series.kurtosis(),
                 jarque_bera(features_df_time_series)[0],
                 jarque_bera(features_df_time_series)[1]],
          index=['mean',
                 'median',
                 'max',
                 'min',
                 'std',
                 'skewness',
                 'kurtosis',
                 'jarque_bera',
                 'jarque_bera_prob'])

# ARMA

In [None]:
def get_stats(y_true, y_pred, model_params):
    print(pd.Series({
        'akaike': akkake_criteria(y_true, y_pred, model_params),
        'rmse': rmse(y_true, y_pred, model_params),
        'durbin watson':durbin_watson(y_true-y_pred),
        'determination coef':determination_coef(y_true, y_pred)
    }))

In [None]:
arma_ma_simple_5 = ARMA(on_residuals=True)
pred, target = arma_ma_simple_5.fit_predict(features_df_time_series)

get_stats(target, pred, len(arma_ma_simple_5.arma_coefs))

In [None]:
pd.Series({
        'akaike': akkake_criteria(target, pred, len(arma_ma_simple_5.arma_coefs)),
        'rmse': rmse(target, pred, len(arma_ma_simple_5.arma_coefs)),
        'durbin watson':durbin_watson(target-pred),
        'determination coef':determination_coef(target, pred),
        'schwarz criteria': shwarz_criteria(target, pred, len(arma_ma_simple_5.arma_coefs)),
        'rsse': rsse(target, pred),
        'adjusted det coef': adjusted_det_coef(target, pred, len(arma_ma_simple_5.arma_coefs)),
        'log likelihood': log_likelihood(target, pred),
        'mean dependent var': mean_dependent_var(target, pred),
        'std dependent var': std_dependent_var(target, pred)
    })

In [None]:
arma_ma_simple_5.arma_coefs

In [None]:
pred, target = arma_ma_simple_5.ar_results

get_stats(target, pred, len(arma_ma_simple_5.initial_lin_reg_coefs))

In [None]:
arma_ma_simple_5.initial_lin_reg_coefs

In [None]:
arma_ma_simple_5 = ARMA(on_residuals=True, ma_window_size=10)
pred, target = arma_ma_simple_5.fit_predict(features_df_time_series)

get_stats(target, pred, len(arma_ma_simple_5.arma_coefs))

In [None]:
arma_ma_simple_5.arma_coefs

In [None]:
arma_ma_simple_5 = ARMA(on_residuals=True, ma_type='exp')
pred, target = arma_ma_simple_5.fit_predict(features_df_time_series)

get_stats(target, pred, len(arma_ma_simple_5.arma_coefs))

In [None]:
arma_ma_simple_5.arma_coefs

In [None]:
arma_ma_simple_5 = ARMA(on_residuals=True, ma_type='exp', ma_window_size=10)
pred, target = arma_ma_simple_5.fit_predict(features_df_time_series)

get_stats(target, pred, len(arma_ma_simple_5.arma_coefs))

In [None]:
arma_ma_simple_5.arma_coefs

In [None]:
arma_ma_simple_5 = ARMA(on_residuals=False)
pred, target = arma_ma_simple_5.fit_predict(features_df_time_series)

get_stats(target, pred, len(arma_ma_simple_5.arma_coefs))

In [None]:
arma_ma_simple_5.arma_coefs

In [None]:
arma_ma_simple_5 = ARMA(on_residuals=False, ma_window_size=10)
pred, target = arma_ma_simple_5.fit_predict(features_df_time_series)

get_stats(target, pred, len(arma_ma_simple_5.arma_coefs))

In [None]:
arma_ma_simple_5.arma_coefs

In [None]:
arma_ma_simple_5 = ARMA(on_residuals=False, ma_type='exp')
pred, target = arma_ma_simple_5.fit_predict(features_df_time_series)

get_stats(target, pred, len(arma_ma_simple_5.arma_coefs))

In [None]:
arma_ma_simple_5.arma_coefs

In [None]:
arma_ma_simple_5 = ARMA(on_residuals=False, ma_type='exp', ma_window_size=10)
pred, target = arma_ma_simple_5.fit_predict(features_df_time_series)

get_stats(target, pred, len(arma_ma_simple_5.arma_coefs))

In [None]:
arma_ma_simple_5.arma_coefs

# Multiple regression

In [None]:
feature_df.corr()

In [None]:
scaled_feature_df = StandardScaler().fit_transform(feature_df)
feature_df = pd.DataFrame(scaled_feature_df, columns=feature_df.columns)

In [None]:
linear_reg = LinearRegression()
linear_reg.fit(feature_df.drop(columns='y'), feature_df['y'])
pred = linear_reg.predict(feature_df.drop(columns='y'))

get_stats(feature_df['y'], pred, 7)

In [None]:
pd.Series({
        'akaike': akkake_criteria(feature_df['y'], pred, 7),
        'rmse': rmse(feature_df['y'], pred, 7),
        'durbin watson':durbin_watson(feature_df['y'] - pred),
        'determination coef':determination_coef(feature_df['y'], pred),
        'schwarz criteria': shwarz_criteria(feature_df['y'], pred, 7),
        'rsse': rsse(feature_df['y'], pred),
        'adjusted det coef': adjusted_det_coef(feature_df['y'], pred, 7),
        'log likelihood': log_likelihood(feature_df['y'], pred),
        'mean dependent var': mean_dependent_var(feature_df['y'], pred),
        'std dependent var': std_dependent_var(feature_df['y'], pred)
    })

In [None]:
linear_reg.intercept_, linear_reg.coef_

In [None]:
glob('data/RTS*')

# Multiple ARMA

In [None]:
arma_ma_simple_5 = ARMA(on_residuals=True, ma_window_size=5)
pred, target = arma_ma_simple_5.fit_predict(feature_df['y'], feature_df.drop(columns='y'))

get_stats(target, pred, len(arma_ma_simple_5.arma_coefs))

In [None]:
pd.Series({
        'akaike': akkake_criteria(target, pred, len(arma_ma_simple_5.arma_coefs)),
        'rmse': rmse(target, pred, len(arma_ma_simple_5.arma_coefs)),
        'durbin watson':durbin_watson(target-pred),
        'determination coef':determination_coef(target, pred),
        'schwarz criteria': shwarz_criteria(target, pred, len(arma_ma_simple_5.arma_coefs)),
        'rsse': rsse(target, pred),
        'adjusted det coef': adjusted_det_coef(target, pred, len(arma_ma_simple_5.arma_coefs)),
        'log likelihood': log_likelihood(target, pred),
        'mean dependent var': mean_dependent_var(target, pred),
        'std dependent var': std_dependent_var(target, pred)
    })

In [None]:
arma_ma_simple_5.arma_coefs