# Model DecisionTreeClassifier

In [48]:
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.inspection import permutation_importance

In [49]:
# Global settings

# SELECT Data Source file name
dsn: str = 'SOL-USD_1h_2024-01-01-2024-12-31_110_feature' # Select data source name

select_columns: list | None = None

target = "y_next_trend"   # ['y_next_trend', 'y_next_pct']


test_months: int = 2


### Data

In [50]:
data: pd.DataFrame = pd.read_csv(f"data/{dsn}")
data['datetime'] = pd.to_datetime(data['datetime'])
# data = data.set_index("datetime")

if select_columns:
    data = data[select_columns]

In [51]:
# Features 
features = [i for i in data.columns if (not i.startswith("y_") and not i == "datetime")]

In [52]:
test_start_date = data['datetime'].max() - pd.DateOffset(months=test_months)

train_data = data[data['datetime'] < test_start_date]
test_data = data[data['datetime'] >= test_start_date]



In [53]:
def train_model_classifier(train_data_, feature_cols, target_column) -> DecisionTreeClassifier:
    # test_start_date = data['datetime'].max() - pd.DateOffset(months=test_months)
    # train_data = data[data['datetime'] < test_start_date]
    # test_data = data[data['datetime'] >= test_start_date]

    # Разделение на признаки (X) и целевую переменную (y)
    X_train = train_data_[feature_cols]
    y_train = train_data_[target_column]
    # X_test = test_data[feature_cols]
    # y_test = test_data[target_column]
    
    # Модель дерева решений
    model = DecisionTreeClassifier(max_depth=5, random_state=42)
    model.fit(X_train, y_train)
    
    # Прогноз вероятностей на тренировочной и тестовой выборках
    y_train_pred = model.predict(X_train)
    # y_test_pred = model.predict(X_test)
    

    # Печать таблицы метрик
    # print(f'{"Train Accuracy  ":<15}'
    #       f'{"Test Accuracy   ":<15}')
    # print("-" * 50)   
    
    # print(f'{accuracy_score(y_train, y_train_pred):<15.2f} {accuracy_score(y_test, y_test_pred):<15.2f}')
    print("DecisionTreeClassifier")
    print(f'Train Accuracy: {accuracy_score(y_train, y_train_pred):.2f}')
    # print(f'Test Accuracy: {accuracy_score(y_test, y_test_pred):.2f}')
    
    return model


In [54]:
def test_model_classifier(test_data_, feature_cols, target_column, model: DecisionTreeClassifier):
    # test_start_date = data['datetime'].max() - pd.DateOffset(months=test_months)
    # train_data = data[data['datetime'] < test_start_date]
    # test_data = data[data['datetime'] >= test_start_date]

    # Разделение на признаки (X) и целевую переменную (y)
    # X_train = train_data_[feature_cols]
    # y_train = train_data_[target_column]
    X_test = test_data_[feature_cols]
    y_test = test_data_[target_column]
    
    # Модель дерева решений
    # model = DecisionTreeClassifier(max_depth=5, random_state=42)
    # model.fit(X_train, y_train)
    
    # Прогноз вероятностей на тренировочной и тестовой выборках
    # y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    

    # Печать таблицы метрик
    # print(f'{"Train Accuracy  ":<15}'
    #       f'{"Test Accuracy   ":<15}')
    # print("-" * 50)   
    
    # print(f'{accuracy_score(y_train, y_train_pred):<15.2f} {accuracy_score(y_test, y_test_pred):<15.2f}')
    print("DecisionTreeClassifier")
    # print(f'Train Accuracy: {accuracy_score(y_train, y_train_pred):.2f}')
    print(f'Test Accuracy: {accuracy_score(y_test, y_test_pred):.2f}')


In [55]:
model = train_model_classifier(train_data, features, target)

DecisionTreeClassifier
Train Accuracy: 0.56


In [57]:
model = test_model_classifier(test_data, features, target, model)

DecisionTreeClassifier
Test Accuracy: 0.51
