In [1]:
import yfinance as yf
import numpy as np
import pandas as pd
from scipy.signal import argrelextrema
import talib
from abc import ABC, abstractmethod
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
class FeatureBase(ABC):
    """
    Abstract base class for all features.
    """
    @abstractmethod
    def compute(self, data=None, *args, **kwargs):
        """
        Abstract method to compute the feature value for the given data.
        """
        pass


# TODO: there's some wrong in calculate trend ways, add a new method that can split data by date
class IndicatorTrend(FeatureBase):
    """
    Indicator to calculate the trend based on various methods.
    """
    def compute(self, data, *args, **kwargs):
        """
        Compute the trend for the given data using the specified method.

        Parameters:
        - data: DataFrame containing the data.
        - method: Method for trend calculation (e.g., 'MA', 'LocalExtrema').
        - ma_days: Number of days for moving average.
        - oder_days: Number of days for order.
        - trend_days: Number of days to determine the trend.

        Returns:
        - DataFrame with trend values.
        """
        method = kwargs.get('method', 'MA')
        ma_days = kwargs.get('ma_days', 20)
        oder_days = kwargs.get('oder_days', 20)
        trend_days = kwargs.get('trend_days', 5)

        if method == 'MA':
            return self.calculate_trend_MA(data, ma_days=ma_days, trend_days=trend_days)
        elif method == 'LocalExtrema':
            return self.calculate_trend_LocalExtrema(data, oder_days=oder_days)
        else:
            raise ValueError(f"Invalid trend calculation method: {method}")

    def calculate_trend_MA(self, data, ma_days=20, trend_days=5):
        """
        Calculate trend using Moving Average method.

        Parameters:
        - data: DataFrame containing the data.
        - ma_days: Number of days for moving average.
        - trend_days: Number of days to determine the trend.

        Returns:
        - DataFrame with trend values.
        """
        data['MA'] = data['Close'].rolling(window=ma_days).mean()
        data['Trend'] = np.nan
        n = len(data)

        for i in range(n - trend_days + 1):
            if all(data['MA'].iloc[i + j] < data['MA'].iloc[i + j + 1] for j in range(trend_days - 1)):
                data['Trend'].iloc[i:i + trend_days] = 0
            elif all(data['MA'].iloc[i + j] > data['MA'].iloc[i + j + 1] for j in range(trend_days - 1)):
                data['Trend'].iloc[i:i + trend_days] = 1
        data['Trend'].fillna(method='ffill', inplace=True)
        return data.drop(columns=['MA'])

    def calculate_trend_LocalExtrema(self, data, oder_days=20):
        """
        Calculate trend using Local Extrema method.

        Parameters:
        - data: DataFrame containing the data.
        - oder_days: Number of days for order.

        Returns:
        - DataFrame with trend values.
        """
        local_max_indices = argrelextrema(
            data['Close'].values, np.greater_equal, order=oder_days)[0]
        local_min_indices = argrelextrema(
            data['Close'].values, np.less_equal, order=oder_days)[0]
        data['Local Max'] = data.iloc[local_max_indices]['Close']
        data['Local Min'] = data.iloc[local_min_indices]['Close']
        data['Trend'] = np.nan
        prev_idx = None
        prev_trend = None
        prev_type = None

        for idx in sorted(np.concatenate([local_max_indices, local_min_indices])):
            if idx in local_max_indices:
                current_type = "max"
            else:
                current_type = "min"

            if prev_trend is None:
                if current_type == "max":
                    prev_trend = 1
                else:
                    prev_trend = 0
            else:
                if prev_type == "max" and current_type == "min":
                    data.loc[prev_idx:idx, 'Trend'] = 1
                    prev_trend = 1
                elif prev_type == "min" and current_type == "max":
                    data.loc[prev_idx:idx, 'Trend'] = 0
                    prev_trend = 0
                else:
                    if current_type == "max":
                        data.loc[prev_idx:idx, 'Trend'] = 0
                        prev_trend = 0
                    else:
                        data.loc[prev_idx:idx, 'Trend'] = 1
                        prev_trend = 1

            prev_idx = idx
            prev_type = current_type
        data['Trend'].fillna(method='ffill', inplace=True)
        return data.drop(columns=['Local Max', 'Local Min'])


class IndicatorMACD(FeatureBase):
    """
    Indicator to calculate the Moving Average Convergence Divergence (MACD).
    """

    def compute(self, data, *args, **kwargs):
        fastperiod = kwargs.get('fastperiod', 5)
        slowperiod = kwargs.get('slowperiod', 10)
        signalperiod = kwargs.get('signalperiod', 9)
        data['MACD'], _, _ = talib.MACD(
            data['Close'], fastperiod=fastperiod, slowperiod=slowperiod, signalperiod=signalperiod)
        return data


class IndicatorROC(FeatureBase):
    def compute(self, data, *args, **kwargs):
        trend_days = kwargs.get('trend_days', 5)
        data['ROC'] = talib.ROC(data['Close'], timeperiod=trend_days)
        return data


class IndicatorStochasticOscillator(FeatureBase):
    def compute(self, data, *args, **kwargs):
        trend_days = kwargs.get('trend_days', 5)
        data['StoK'], data['StoD'] = talib.STOCH(
            data['High'], data['Low'], data['Close'], fastk_period=trend_days, slowk_period=3, slowd_period=3)
        return data


class IndicatorCCI(FeatureBase):
    def compute(self, data, *args, **kwargs):
        timeperiod = kwargs.get('timeperiod', 14)
        data['CCI'] = talib.CCI(data['High'], data['Low'],
                                data['Close'], timeperiod=timeperiod)
        return data


class IndicatorRSI(FeatureBase):
    def compute(self, data, *args, **kwargs):
        timeperiod = kwargs.get('timeperiod', 14)
        data['RSI'] = talib.RSI(data['Close'], timeperiod=timeperiod)
        return data


class IndicatorVMA(FeatureBase):
    def compute(self, data, *args, **kwargs):
        timeperiod = kwargs.get('timeperiod', 20)
        data['VMA'] = talib.MA(data['Volume'], timeperiod=timeperiod)
        return data


class IndicatorPctChange(FeatureBase):
    def compute(self, data, *args, **kwargs):
        data['pctChange'] = data['Close'].pct_change() * 100
        return data


class TreasuryYieldThreeMonth(FeatureBase):
    def compute(self, data, *args, **kwargs):
        start_date = kwargs.get('start_date')
        end_date = kwargs.get('end_date')
        three_month_treasury_yield = yf.download(
            "^IRX", start_date, end_date)["Close"]
        data['3M Treasury Yield'] = three_month_treasury_yield
        return data


class TreasuryYieldFiveYear(FeatureBase):
    def compute(self, data, *args, **kwargs):
        start_date = kwargs.get('start_date')
        end_date = kwargs.get('end_date')
        five_year_treasury_yield = yf.download(
            "^FVX", start_date, end_date)["Close"]
        data['5Y Treasury Yield'] = five_year_treasury_yield
        return data


class TreasuryYieldTenYear(FeatureBase):
    def compute(self, data, *args, **kwargs):
        start_date = kwargs.get('start_date')
        end_date = kwargs.get('end_date')
        ten_year_treasury_yield = yf.download(
            "^TNX", start_date, end_date)["Close"]
        data['10Y Treasury Yield'] = ten_year_treasury_yield
        return data


class TreasuryYieldThirtyYear(FeatureBase):
    def compute(self, data, *args, **kwargs):
        start_date = kwargs.get('start_date')
        end_date = kwargs.get('end_date')
        thirty_year_treasury_yield = yf.download(
            "^TYX", start_date, end_date)["Close"]
        data['30Y Treasury Yield'] = thirty_year_treasury_yield
        return data
# Add other features here as needed


class FeatureFactory:
    """
    Factory class dedicated to creating various technical features.
    """
    @staticmethod
    def get_feature(feature_type):
        """
        Retrieve the desired feature based on the specified type.

        Parameters:
        - feature_type: Type of feature (e.g., 'Trend', 'MACD').

        Returns:
        - Feaature object corresponding to the specified type.

        Raises:
        - ValueError: If the provided feature type is not supported.
        """
        features = {
            "Trend": IndicatorTrend,
            "MACD": IndicatorMACD,
            "ROC": IndicatorROC,
            "Stochastic Oscillator": IndicatorStochasticOscillator,
            "CCI": IndicatorCCI,
            "RSI": IndicatorRSI,
            "VMA": IndicatorVMA,
            "PctChange": IndicatorPctChange,
            "3M Treasury Yield": TreasuryYieldThreeMonth,
            "5Y Treasury Yield": TreasuryYieldFiveYear,
            "10Y Treasury Yield": TreasuryYieldTenYear,
            "30Y Treasury Yield": TreasuryYieldThirtyYear,
            # Add other features here as needed
        }
        feature = features.get(feature_type)
        if feature is None:
            raise ValueError(f"Invalid feature type: {feature_type}")
        return feature()


class CleanerBase(ABC):
    """Abstract base class for data processors."""
    @abstractmethod
    def check(self, data):
        """Method to check the data for issues."""
        pass

    @abstractmethod
    def clean(self, data):
        """Method to clean the data from identified issues."""
        pass


class CleanerMissingValue(CleanerBase):
    """Concrete class for checking and handling missing data."""
    def check(self, data):
        """Check for missing data in the dataframe."""
        return data.isnull().sum()

    def clean(self, data, strategy='auto'):
        """Handle missing data based on the chosen strategy."""
        if strategy == 'auto':
            while data.iloc[0].isnull().any():
                data = data.iloc[1:]
            data.fillna(method='ffill', inplace=True)

        elif strategy == 'drop':
            data.dropna(inplace=True)

        elif strategy == 'fillna':
            data.fillna(method='ffill', inplace=True)

        elif strategy == 'none':
            pass

        else:
            raise ValueError("Invalid strategy provided.")

        return data


class ProcessorFactory:
    """Factory class to create data processors."""
    @staticmethod
    def get_cleaner(clean_type, *args, **kwargs):
        """Create a data processor based on the provided type."""
        if clean_type == "MissingData":
            return CleanerMissingValue(*args, **kwargs)
        else:
            raise ValueError(f"Processor type {clean_type} not recognized.")

    @staticmethod
    def get_standardize_method(data, method='StandardScaler'):
        """Standardize the data using the specified method."""
        if method == 'StandardScaler':
            scaler = StandardScaler()
        elif method == 'MinMaxScaler':
            scaler = MinMaxScaler()
        else:
            raise ValueError(f"Invalid scaler method: {method}.")
        return scaler.fit_transform(data)

    @staticmethod
    def standardize_and_split_data(data, split_ratio=0.7, target_col="Trend", feature_cols=None):
        """Standardize the data and split it into training and testing sets."""
        if not feature_cols:
            feature_cols = data.columns.to_list()
        x_data = data[feature_cols]

        # Generate the one-hot encoding
        y_data = pd.get_dummies(data[target_col], prefix='Trend')

        # Check if the split index is valid
        split_idx = int(len(x_data) * split_ratio)
        if split_idx < 1 or split_idx >= len(x_data):
            raise ValueError(
                "Invalid split ratio leading to incorrect data partitioning.")

        X_test = x_data.iloc[split_idx:]
        y_test = y_data.iloc[split_idx:]
        X_train = x_data.iloc[:split_idx]
        y_train = y_data.iloc[:split_idx]

        return X_train, y_train, X_test, y_test

    @staticmethod
    def prepare_multistep_data(x_data, y_data, look_back, predict_steps, slide_steps=1):
        """
        Prepare the data for multi-step prediction and apply standardization within each sliding window.
        """
        x_date = []
        y_date = []
        x_data_multistep = []
        y_data_multistep = []

        for i in range(0, len(x_data) - look_back - predict_steps + 1, slide_steps):
            x_date.append(x_data.index[i:i + look_back])

            y_date.append(
                x_data.index[i + look_back:i + look_back + predict_steps])

            x_window = x_data.iloc[i:i + look_back].values
            y_window = y_data.iloc[i + look_back:i +
                                   look_back + predict_steps].values

            x_window_standardized = ProcessorFactory.get_standardize_method(
                x_window)

            x_data_multistep.append(x_window_standardized)
            y_data_multistep.append(y_window)

        return np.array(x_data_multistep), np.array(y_data_multistep), np.array(x_date), np.array(y_date)


class Preprocessor:
    """
    Fetching, processing, and preparing model data.
    """
    def __init__(self, data=None, start_date=None, end_date=None):
        self.data = data
        self.start_date = start_date
        self.end_date = end_date
        self.trend_method = "MA"
        self.features = []
        self.processors = []
        self.X_train = None
        self.y_train = None
        self.X_test = None
        self.y_test = None

    def set_seed(self, seed_value=42):
        """Set seed for reproducibility."""
        np.random.seed(seed_value)

    def fetch_stock_data(self, stock_symbol, start_date=None, end_date=None):
        """Fetch stock data from Yahoo Finance."""
        if start_date:
            self.start_date = start_date
        if end_date:
            self.end_date = end_date
        return yf.download(stock_symbol, start=self.start_date, end=self.end_date)

    def add_feature(self, feature_type, *args, **kwargs):
        feature = FeatureFactory.get_feature(feature_type)
        self.data = feature.compute(self.data, *args, **kwargs)

    def add_data_cleaner(self, clean_type='MissingData', strategy='drop'):
        """Method to check and clean the data using a specific processor."""
        processor = ProcessorFactory.get_cleaner(clean_type)
        issues = processor.check(self.data)
        self.data = processor.clean(self.data, strategy=strategy)
        return issues

    def process_data(self, split_ratio=0.7, target_col="Trend", feature_cols=None, look_back=64, predict_steps=16, train_slide_steps=1, test_slide_steps=16):
        """
        Use ProcessorFactory to standardize and split the data, and prepare it for multi-step prediction if required.
        """
        self.X_train, self.y_train, self.X_test, self.y_test = ProcessorFactory.standardize_and_split_data(
            self.data, split_ratio, target_col, feature_cols)

        if look_back and predict_steps:
            self.X_train, self.y_train, self.train_dates, _ = ProcessorFactory.prepare_multistep_data(
                self.X_train, self.y_train, look_back, predict_steps, train_slide_steps)
            self.X_test, self.y_test, _, self.test_dates = ProcessorFactory.prepare_multistep_data(
                self.X_test, self.y_test, look_back, predict_steps, test_slide_steps)


model_data = Preprocessor()
model_data.set_seed(42)
start_date = "2001-01-01"
stop_date = "2021-01-01"
stock_symbol = "^GSPC"
model_data.data = model_data.fetch_stock_data(
    stock_symbol, start_date, stop_date)

[*********************100%***********************]  1 of 1 completed


In [3]:
model_data.data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001-01-02,1320.280029,1320.280029,1276.050049,1283.270020,1283.270020,1129400000
2001-01-03,1283.270020,1347.760010,1274.619995,1347.560059,1347.560059,1880700000
2001-01-04,1347.560059,1350.239990,1329.140015,1333.339966,1333.339966,2131000000
2001-01-05,1333.339966,1334.770020,1294.949951,1298.349976,1298.349976,1430800000
2001-01-08,1298.349976,1298.349976,1276.290039,1295.859985,1295.859985,1115500000
...,...,...,...,...,...,...
2020-12-24,3694.030029,3703.820068,3689.320068,3703.060059,3703.060059,1883780000
2020-12-28,3723.030029,3740.510010,3723.030029,3735.360107,3735.360107,3535460000
2020-12-29,3750.010010,3756.120117,3723.310059,3727.040039,3727.040039,3393290000
2020-12-30,3736.189941,3744.629883,3730.209961,3732.040039,3732.040039,3154850000


In [4]:
features = [
    {"type": "Trend", "method": "MA", "oder_days": 20,
        "ma_days": 20, "trend_days": 5},
    {"type": "MACD", "fastperiod": 5, "slowperiod": 10, "signalperiod": 9},
    {"type": "ROC", "trend_days": 5},
    {"type": "Stochastic Oscillator", "trend_days": 5},
    {"type": "CCI", "timeperiod": 14},
    {"type": "RSI", "timeperiod": 14},
    {"type": "VMA", "timeperiod": 20},
    {"type": "PctChange"},
    {"type": "3M Treasury Yield", "start_date": "2001-01-01", "end_date": "2021-01-01"},
    {"type": "5Y Treasury Yield", "start_date": "2001-01-01", "end_date": "2021-01-01"},
    {"type": "10Y Treasury Yield", "start_date": "2001-01-01", "end_date": "2021-01-01"},
    {"type": "30Y Treasury Yield", "start_date": "2001-01-01", "end_date": "2021-01-01"},
]  # Add other features here as needed

for feature_params in features:
    feature_type = feature_params["type"]
    model_data.add_feature(feature_type, **feature_params)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Trend'].iloc[i:i + trend_days] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Trend'].iloc[i:i + trend_days] = 1


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [5]:
model_data.data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Trend,MACD,ROC,StoK,StoD,CCI,RSI,VMA,pctChange,3M Treasury Yield,5Y Treasury Yield,10Y Treasury Yield,30Y Treasury Yield
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2001-01-02,1320.280029,1320.280029,1276.050049,1283.270020,1283.270020,1129400000,,,,,,,,,,5.680,4.752,4.917,5.350
2001-01-03,1283.270020,1347.760010,1274.619995,1347.560059,1347.560059,1880700000,,,,,,,,,5.009861,5.530,4.923,5.097,5.463
2001-01-04,1347.560059,1350.239990,1329.140015,1333.339966,1333.339966,2131000000,,,,,,,,,-1.055247,5.240,4.808,5.013,5.435
2001-01-05,1333.339966,1334.770020,1294.949951,1298.349976,1298.349976,1430800000,,,,,,,,,-2.624236,4.970,4.672,4.948,5.414
2001-01-08,1298.349976,1298.349976,1276.290039,1295.859985,1295.859985,1115500000,,,,,,,,,-0.191781,5.050,4.601,4.897,5.408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-24,3694.030029,3703.820068,3689.320068,3703.060059,3703.060059,1883780000,0.0,5.338616,-0.521693,63.138326,63.276036,54.766231,61.278661,4.609992e+09,0.353659,0.075,0.364,0.928,1.664
2020-12-28,3723.030029,3740.510010,3723.030029,3735.360107,3735.360107,3535460000,0.0,10.249831,0.699577,76.059963,66.443033,165.892897,65.834224,4.647846e+09,0.872253,0.080,0.364,0.933,1.669
2020-12-29,3750.010010,3756.120117,3723.310059,3727.040039,3727.040039,3393290000,0.0,11.046858,0.869305,77.492969,72.230419,141.836511,63.753548,4.502090e+09,-0.222738,0.090,0.378,0.935,1.674
2020-12-30,3736.189941,3744.629883,3730.209961,3732.040039,3732.040039,3154850000,0.0,11.569669,1.214453,74.218337,75.923756,119.256437,64.480076,4.388908e+09,0.134155,0.070,0.370,0.926,1.662


In [6]:
issues_detected = model_data.add_data_cleaner("MissingData", strategy='auto')
issues_detected

Open                   0
High                   0
Low                    0
Close                  0
Adj Close              0
Volume                 0
Trend                 19
MACD                  17
ROC                    5
StoK                   8
StoD                   8
CCI                   13
RSI                   14
VMA                   19
pctChange              1
3M Treasury Yield      7
5Y Treasury Yield      7
10Y Treasury Yield     7
30Y Treasury Yield     7
dtype: int64

In [7]:
model_data.data.isnull().sum()

Open                  0
High                  0
Low                   0
Close                 0
Adj Close             0
Volume                0
Trend                 0
MACD                  0
ROC                   0
StoK                  0
StoD                  0
CCI                   0
RSI                   0
VMA                   0
pctChange             0
3M Treasury Yield     0
5Y Treasury Yield     0
10Y Treasury Yield    0
30Y Treasury Yield    0
dtype: int64

In [8]:
split_ratio = 0.7
target_col = "Trend"
feature_cols = None  # None means use all columns
# feature_cols = ['Close']
look_back = 64  # number of previous days' data to consider
predict_steps = 16  # number of days to predict in the future
slide_steps = 1  # sliding window step size

model_data.process_data(split_ratio=0.7, target_col="Trend", feature_cols=feature_cols, look_back=look_back,
                        predict_steps=predict_steps, train_slide_steps=1, test_slide_steps=predict_steps)

model_data.X_train.shape, model_data.y_train.shape, model_data.X_test.shape, model_data.y_test.shape

((3430, 64, 19), (3430, 16, 2), (90, 64, 19), (90, 16, 2))

In [9]:
# Extracting the last dimension and calculating the ratio of the two classes in model_data.y_train
class_0_count = np.sum(model_data.data['Trend'] == 0)
class_1_count = np.sum(model_data.data['Trend'] == 1)

class_ratio = {
    "Trend_0": class_0_count,
    "Trend_1": class_1_count
}

class_ratio

{'Trend_0': 3252, 'Trend_1': 1761}

In [10]:
# Extracting the last dimension and calculating the ratio of the two classes in model_data.y_train
class_0_count = np.sum(model_data.y_train[:, :, 0])
class_1_count = np.sum(model_data.y_train[:, :, 1])

class_ratio = {
    "Trend_0": class_0_count,
    "Trend_1": class_1_count
}

class_ratio

{'Trend_0': 34900, 'Trend_1': 19980}

In [11]:
# Extracting the last dimension and calculating the ratio of the two classes in model_data.y_train
class_0_count = np.sum(model_data.y_test[:, :, 0])
class_1_count = np.sum(model_data.y_test[:, :, 1])

class_ratio = {
    "Trend_0": class_0_count,
    "Trend_1": class_1_count
}

class_ratio

{'Trend_0': 1008, 'Trend_1': 432}

In [12]:
# import unittest


# class TestData(unittest.TestCase):

#     def setUp(self):
#         self.model_data = Preprocessor()
#         self.model_data.set_seed(42)
#         self.start_date = "2020-01-01"
#         self.end_date = "2021-01-01"
#         self.stock_symbol = "^GSPC"

#         # Mock data for the tests to avoid yfinance dependency
#         x = np.linspace(0, 50, 1000)  # Generate 1000 points between 0 and 50
#         sin_wave = np.sin(x)  # Generate a sinusoidal wave
#         self.model_data.data = pd.DataFrame({
#             'Open': sin_wave,
#             'High': sin_wave + 0.1,  # Adding a small value to simulate the 'High' value for the day
#             # Subtracting a small value to simulate the 'Low' value for the day
#             'Low': sin_wave - 0.1,
#             'Close': sin_wave,
#             'Trend': [0 for i in range(1000)]
#             # 'Trend': [(i % 2) for i in range(1000)]
#         })

#     def test_fetch_stock_data(self):
#         self.assertIsNotNone(self.model_data.data)
#         self.assertFalse(self.model_data.data.empty)

#     def test_add_single_indicator(self):
#         initial_columns = set(self.model_data.data.columns)
#         self.model_data.add_indicator("MACD", fastperiod=5,
#                                       slowperiod=10, signalperiod=9)
#         new_columns = set(self.model_data.data.columns)
#         self.assertGreater(len(new_columns), len(initial_columns))

#     def test_add_multiple_indicators(self):
#         initial_columns = set(self.model_data.data.columns)
#         self.model_data.add_indicator("RSI", timeperiod=14)
#         self.model_data.add_indicator("CCI", timeperiod=14)
#         new_columns = set(self.model_data.data.columns)
#         self.assertGreater(len(new_columns), len(initial_columns))

#     def test_invalid_indicator(self):
#         with self.assertRaises(ValueError):
#             self.model_data.add_indicator("InvalidIndicatorName")

#     def test_data_cleaning(self):
#         # Introducing missing values into the mock data
#         self.model_data.data.iloc[2, 1] = np.nan
#         self.model_data.data.iloc[4, 3] = np.nan
#         initial_missing_count = self.model_data.data.isnull().sum().sum()

#         # Use the add_data_cleaner method to clean the data
#         self.model_data.add_data_cleaner("MissingData", strategy='auto')

#         # Verify that missing values have been cleaned
#         final_missing_count = self.model_data.data.isnull().sum().sum()
#         self.assertLess(final_missing_count, initial_missing_count)

#     def test_data_splitting(self):
#         # Populate the mock data with necessary features
#         features = [
#             {"type": "Trend", "method": "MA", "oder_days": 20,
#                 "ma_days": 20, "trend_days": 5},
#             # Add other necessary features here
#         ]

#         for indicator_params in features:
#             feature_type = indicator_params["type"]
#             self.model_data.add_indicator(feature_type, **indicator_params)

#         # Call the process_data method to standardize and split the data
#         split_ratio = 0.7
#         target_col = "Trend"
#         feature_cols = None
#         look_back = 10
#         predict_steps = 5
#         slide_steps = 1

#         self.model_data.process_data(split_ratio=split_ratio, target_col=target_col,
#                                      look_back=look_back, predict_steps=predict_steps, slide_steps=slide_steps)

#         # Test the shape of the training and testing datasets
#         self.assertEqual(
#             self.model_data.X_train.shape[0], self.model_data.y_train.shape[0])
#         self.assertEqual(
#             self.model_data.X_test.shape[0], self.model_data.y_test.shape[0])

#         # Ensure sum of lengths of train and test datasets matches the length of the original dataset
#         total_data_points = len(self.model_data.data) - \
#             look_back - predict_steps + 1
#         self.assertEqual(
#             self.model_data.X_train.shape[0] + self.model_data.X_test.shape[0], total_data_points)

#         # Ensure data split ratio is approximately maintained
#         train_ratio = self.model_data.X_train.shape[0] / total_data_points
#         self.assertAlmostEqual(train_ratio, split_ratio, places=1)


# # Running the tests
# unittest_result_splitting = unittest.TextTestRunner().run(
#     unittest.TestLoader().loadTestsFromTestCase(TestData))
# unittest_result_splitting