In [None]:
import sys
import os

sys.path.append(os.path.abspath('..'))

import yaml
import pandas as pd
from sklearn.model_selection import train_test_split

from src.data.prepare_data import prepare_data
from src.models.classification import Classification
from src.models.evaluation import Evaluation

In [None]:
# read config
with open('../'+'config.yml', 'r') as file:
    config=yaml.load(file, Loader= yaml.SafeLoader)
del file

In [None]:
# load and prepare data
df = pd.read_csv('../'+config['data_loader']['path'])
df = prepare_data(df=df)
display(df.head())

In [None]:
# split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    df.iloc[:,:-1], df['target']
    , test_size=config['train_test_split']['test_size']
    , random_state=123
    , shuffle=True
    , stratify=df['target']
    )

# check class distributions
print(
    y_train.value_counts(normalize=True)
    , y_test.value_counts(normalize=True)
    )

In [None]:
# fit algorithm
clf = Classification(algorithm='DecisionTreeClassifier', **{'max_depth': 10})
clf.fit(X=X_train.iloc[:, :3], y=y_train)

# test set evaluation
eval = Evaluation(clf=clf, threshold=0.5)
eval.fit(
    X_train=X_train.iloc[:, :3], y_train=y_train
    , X_test=X_test.iloc[:, :3], y_test=y_test
    )

In [None]:
from src.visuals.boundary import plot_boundary

plot_boundary(X=X_test.iloc[:, :3], y=y_test, clf=clf, azim=50, plot_points=True) 