#### Import Library

In [14]:
import yaml
import joblib

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report


#### Load Config

In [15]:
with open("../config/config.yaml", "r") as file:
    config = yaml.safe_load(file)
config

{'data_raw': 'data/raw/smoke.csv',
 'data_final': 'data/processed/ori_new.pkl',
 'path_train': ['data/processed/x_train.pkl', 'data/processed/y_train.pkl'],
 'path_valid': ['data/processed/x_valid.pkl', 'data/processed/y_valid.pkl'],
 'path_test': ['data/processed/x_test.pkl', 'data/processed/y_test.pkl'],
 'path_train_feat': ['data/processed/x_train_feat.pkl',
  'data/processed/y_train_feat.pkl'],
 'path_valid_feat': ['data/processed/x_valid_feat.pkl',
  'data/processed/y_valid_feat.pkl'],
 'path_test_feat': ['data/processed/x_test_feat.pkl',
  'data/processed/y_test_feat.pkl'],
 'final_model_path': 'models/production_model.pkl',
 'training_log_path': 'log/training_log.json',
 'new_cols': ['UTC',
  'Temperature',
  'Humidity',
  'TVOC',
  'eCO2',
  'Raw_H2',
  'Raw_Ethanol',
  'Pressure',
  'PM1.0',
  'PM2.5',
  'NC0.5',
  'NC1.0',
  'NC2.5',
  'CNT',
  'Fire_Alarm'],
 'datetime_columns': ['UTC'],
 'float_columns': ['Temperature',
  'Humidity',
  'Pressure',
  'PM1.0',
  'PM2.5',
  'N

#### Load Data

In [16]:
x_train = joblib.load("../data/processed/x_train_feat.pkl")
y_train = joblib.load("../data/processed/y_train_feat.pkl")
x_test = joblib.load("../data/processed/x_test_feat.pkl")
y_test = joblib.load("../data/processed/y_test_feat.pkl")
x_valid = joblib.load("../data/processed/x_valid_feat.pkl")
y_valid = joblib.load("../data/processed/y_valid_feat.pkl")

#### Train Model

In [17]:
dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train)

y_pred = dtc.predict(x_valid)
print(classification_report(y_valid, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.93      0.94      2681
           1       0.97      0.98      0.97      6713

    accuracy                           0.96      9394
   macro avg       0.96      0.95      0.96      9394
weighted avg       0.96      0.96      0.96      9394



In [18]:
joblib.dump(dtc, "../models/production_model.pkl")

['../models/production_model.pkl']