In [1]:
# Data manipulation and analysis
import pandas as pd
import numpy as np

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Suppress warnings for cleaner output
import warnings
warnings.filterwarnings("ignore")

# Custom modules and functions
import capstone.portfolio.optimize as opt
from capstone.portfolio.prune import prune_recommended_portfolios
from capstone.model_selection import overunder_error, ts_cross_val_score, arimax_cross_val_score
from capstone.utils import read_file, get_sectors, load_models, set_plot_style

# SARIMAX model from statsmodels
from statsmodels.tsa.statespace.sarimax import SARIMAX

# PCA & Pipeline
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# Progress bar for loops
from tqdm.auto import tqdm

set_plot_style()

In [None]:
class DataLoader:
    
    def __init__(self):
        self.data = {}
    
    def load_all(self):
        self.data["master_df"] = self._load_file("master_df")
        self.data["stock_returns"] = self._load_file('snp_log_returns')
        self.data["stocks_by_sector"] = self._load_file("stocks_by_sector")
        self.data["sectors"] = self._load_file("sectors")
        self.data["models"] = load_models()
        return self.data
    
    def _load_file(self, name):
        return read_file(name, index_col=0)

class StockRecommender:

    def __init__(self):
        
        self._data_loader = DataLoader()
        self._data = self._data_loader.load_all()
        self._models = self._data["models"]
        self._df = self._data["master_df"]
        self._stock_rets = self._data["stock_returns"]
        self._stocks_by_sec = self._data["stocks_by_sector"]
        self._sectors = self._data["sectors"]

        self._recommended_stocks = None

    def _extract_features(self, df, sectors):
        X_all = df[sectors]
        y_all = df[df.columns[~df.columns.isin(sectors)]]
        return X_all, y_all