In [1]:
import numpy as np
import pandas as pd
from preprocessing.wrangling import get_indi_df, get_labels, slide_and_flatten
from preprocessing.extract_features import get_all_ta_features, get_wavelet_coeffs
from evaluation.eval import all_stock_estimator_test, all_stock_estimator_test_cls
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score, accuracy_score, f1_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import Lasso
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, HistGradientBoostingRegressor, StackingRegressor
from sklearn.svm import SVR, SVC
from numpy.lib.stride_tricks import sliding_window_view
from xgboost import XGBRegressor, XGBClassifier
from sklearn.pipeline import make_pipeline
import os

In [2]:
class PersistanceModel:
    def __init__(self, persist_colname=0):
        self.persist_colname = persist_colname
        
    def __repr__(self):
        return "PersistanceModel(persist_colname={})".format(self.persist_colname)

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        if X.shape[0] != y.shape[0]:
            raise ValueError
        pass
        return self

    def predict(self, X):
        # check_is_fitted(self)
        X = check_array(X)
        if isinstance(X, pd.DataFrame):
            return X.loc[:, self.persist_colname]
        else:
            return X[:, self.persist_colname]

    def get_params(self, deep=True):
    # suppose this estimator has parameters "alpha" and "recursive"
        return {"persist_colname": self.persist_colname}

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

In [3]:
PersistanceModel(0).predict([[1, 2], [3, 4]])

array([1, 3])

In [4]:
from sklearn.utils.estimator_checks import check_estimator
check_estimator(PersistanceModel(0))

In [5]:
list_dir = 'data_collection/stocks_list'
list_prefix = "ind_nifty"
list_suffix = "list.csv"
save_dir = 'data_collection/ohlcv_data'
save_prefix = "ohlcv_"
save_suffix = ".csv"
# results_file = 'results/all_stock_ta_svr.csv'
results_file = 'results/stacking_regressor_new2.csv'
save_preds_stack = 'preds/reg/stacking_regressor_new2.csv'
# rfg_estimator = make_pipeline((RobustScaler()),
#                     (RandomForestRegressor()))

persistence_model = PersistanceModel(persist_colname=0)
persistence_model._estimator_type = "regressor"

# svr_estimator = make_pipeline(SVR())
hgbr = make_pipeline(HistGradientBoostingRegressor(max_iter=50))
abr = make_pipeline(RobustScaler(), AdaBoostRegressor(n_estimators=50))
all_estimators = [('pers', persistence_model), ('hgbr', hgbr), ('abr', abr)]
final_estimator = RandomForestRegressor(n_estimators=10)
stacking_regressor = StackingRegressor(estimators=all_estimators, final_estimator=final_estimator)


res1 = all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=stacking_regressor,
 n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=10, post_process=True, keep_features={'ohlcv'}, passed_comment='stacking_ohlcv', start_date="2017-01-01", end_date=None, target_column='Close', save_preds=save_preds_stack, skip_till='RAJESHEXPO.NS')


# all_stock_estimator_test_cls(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=baseline_svc,
#  n_tr=300, n_ts=1, scorers=[ accuracy_score, f1_score, precision_score, recall_score], len_window=30, 
#  keep_features={'ohlcv'}, passed_comment='baseline_svc_ohlcv_', start_date="2017-01-01", end_date=None, save_preds=save_preds_svc)

# all_stock_estimator_test_cls(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=baseline_svc,
#  n_tr=300, n_ts=1, scorers=[ accuracy_score, f1_score, precision_score, recall_score], len_window=30, 
#  keep_features={'ta'}, passed_comment='baseline_ta_', start_date="2017-01-01", end_date=None, save_preds=save_preds_svc_ta)

# all_stock_estimator_test_cls(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=baseline_svc,
#  n_tr=300, n_ts=1, scorers=[ accuracy_score, f1_score, precision_score, recall_score], len_window=30, skip_till="JINDALSTEL.NS",
#  keep_features={'ohlcv', 'ta'}, passed_comment='baseline_ta_ohlcv_', start_date="2017-01-01", end_date=None, save_preds=save_preds_svc_ta_ohlcv)

# all_stock_estimator_test_cls(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=baseline_svc,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=30, 
#  keep_features={'ta'}, comment='ta_', start_date="2017-01-01", end_date=None)

# all_stock_estimator_test_cls(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=baseline_xgb,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=30, 
#  keep_features={'ta'}, comment='ta_', start_date="2017-01-01", end_date=None)


# ## RUN BASELINE_C FROM TCS.NS
# res1 = all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=baseline_svr,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=30, 
#  keep_features={'c'}, passed_comment='baseline_c_', start_date="2015-01-01", end_date=None, target_column='Close', skip_till="TCS.NS")

# res2 = all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=baseline_svr,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=30, 
#  keep_features={'c'}, passed_comment='baseline_c_', start_date="2015-01-01", end_date=None, target_column='Close')

# res3 = all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=baseline_svr,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=30, 
#  keep_features={'c'}, passed_comment='baseline_c_', start_date="2017-01-01", end_date=None)

# res4 = all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=svr_estimator,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=60, 
#  keep_features={'ta'}, comment='ohlcv_', start_date="2017-01-01", end_date=None)

# res5 = all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=svr_estimator,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=120, 
#  keep_features={'ohlcv'}, comment='ohlcv_', start_date="2017-01-01", end_date=None)

# res6 = all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=svr_estimator,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=120, 
#  keep_features={'ta'}, comment='ta_', start_date="2017-01-01", end_date=None)

#  all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=svr_estimator,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=180, 
#  keep_features={'ta'}, comment='ta_', start_date="2015-01-01", end_date=None)

#   all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=svr_estimator,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=240, 
#  keep_features={'ta'}, comment='ta_', start_date="2015-01-01", end_date=None)

#   all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=svr_estimator,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=300, 
#  keep_features={'ta'}, comment='ta_', start_date="2015-01-01", end_date=None)

#   all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=svr_estimator,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=450, 
#  keep_features={'ta'}, comment='ta_', start_date="2015-01-01", end_date=None)

#   all_stock_estimator_test(list_dir=list_dir, list_prefix=list_prefix, list_suffix=list_suffix, save_dir=save_dir, save_prefix=save_prefix, save_suffix=save_suffix, results_file=results_file, estimator=svr_estimator,
#  n_tr=300, n_ts=1, scorers=[mean_squared_error,mean_absolute_percentage_error, r2_score], len_window=600, 
#  keep_features={'ta'}, comment='ta_', start_date="2015-01-01", end_date=None)

Starting  AMARAJABAT.NS
Starting Reg
Result obtained for AMARAJABAT.NS
Starting  ASHOKLEY.NS
Starting Reg
Result obtained for ASHOKLEY.NS
Starting  BAJAJ-AUTO.NS
Starting Reg
Result obtained for BAJAJ-AUTO.NS
Starting  BALKRISIND.NS
Starting Reg
Result obtained for BALKRISIND.NS
Starting  BHARATFORG.NS
Starting Reg
Result obtained for BHARATFORG.NS
Starting  BOSCHLTD.NS
Starting Reg
Result obtained for BOSCHLTD.NS
Starting  EICHERMOT.NS
Starting Reg
Result obtained for EICHERMOT.NS
Starting  EXIDEIND.NS
Starting Reg
Result obtained for EXIDEIND.NS
Starting  HEROMOTOCO.NS
Starting Reg
Result obtained for HEROMOTOCO.NS
Starting  MRF.NS
Starting Reg
Result obtained for MRF.NS
Starting  M&M.NS
Starting Reg
Result obtained for M&M.NS
Starting  MARUTI.NS
Starting Reg
Result obtained for MARUTI.NS
Starting  TVSMOTOR.NS
Starting Reg
Result obtained for TVSMOTOR.NS
Starting  TATAMOTORS.NS
Starting Reg
Result obtained for TATAMOTORS.NS
Starting  TIINDIA.NS
Starting Reg
Result obtained for TIINDI

KeyboardInterrupt: 