#### Import Library

In [24]:
import yaml
import joblib

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report


#### Load Config

In [25]:
with open("../config/config.yaml", "r") as file:
    config = yaml.safe_load(file)
config

{'data_raw': 'data/raw/smoke.csv',
 'data_final': 'data/processed/ori_new.pkl',
 'path_train': ['data/processed/x_train.pkl', 'data/processed/y_train.pkl'],
 'path_valid': ['data/processed/x_valid.pkl', 'data/processed/y_valid.pkl'],
 'path_test': ['data/processed/x_test.pkl', 'data/processed/y_test.pkl'],
 'path_train_feat': ['data/processed/x_train_feat.pkl',
  'data/processed/y_train_feat.pkl'],
 'path_valid_feat': ['data/processed/x_valid_feat.pkl',
  'data/processed/y_valid_feat.pkl'],
 'path_test_feat': ['data/processed/x_test_feat.pkl',
  'data/processed/y_test_feat.pkl'],
 'final_model_path': 'models/production_model.pkl',
 'training_log_path': 'log/training_log.json',
 'new_cols': ['utc',
  'temperature',
  'humidity',
  'tvoc',
  'eco2',
  'raw_h2',
  'raw_ethanol',
  'pressure',
  'pm10',
  'pm25',
  'nc05',
  'nc10',
  'nc25',
  'cnt',
  'fire_alarm'],
 'datetime_columns': ['utc'],
 'float_columns': ['temperature',
  'humidity',
  'pressure',
  'pm10',
  'pm25',
  'nc05',
 

#### Load Data

In [26]:
x_train = joblib.load("../data/processed/x_train_feat.pkl")
y_train = joblib.load("../data/processed/y_train_feat.pkl")
x_test = joblib.load("../data/processed/x_test_feat.pkl")
y_test = joblib.load("../data/processed/y_test_feat.pkl")
x_valid = joblib.load("../data/processed/x_valid_feat.pkl")
y_valid = joblib.load("../data/processed/y_valid_feat.pkl")

#### Train Model

In [27]:
dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train)

y_pred = dtc.predict(x_valid)
print(classification_report(y_valid, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.91      0.92      2681
           1       0.96      0.98      0.97      6713

    accuracy                           0.96      9394
   macro avg       0.95      0.94      0.95      9394
weighted avg       0.96      0.96      0.96      9394



In [28]:
joblib.dump(dtc, "../models/production_model.pkl")

['../models/production_model.pkl']

In [45]:
import pandas as pd
import numpy as np


cols = [{'temperature' : 0.1, 'humidity' : 0.2, 'tvoc' : 3, 'eco2' : 4, 'raw_h2' : 5, 'raw_ethanol' : 6, 'pressure' : 0.7, 'pm10' : 0.8}]
z = pd.DataFrame(cols)
z

Unnamed: 0,temperature,humidity,tvoc,eco2,raw_h2,raw_ethanol,pressure,pm10
0,0.1,0.2,3,4,5,6,0.7,0.8


In [46]:
model_data = joblib.load("../models/production_model.pkl")

In [48]:
y_pred = model_data.predict(z)

In [51]:
y_pred

array([1])

In [52]:
str(y_pred)

'[1]'