In [12]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline
import joblib
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from skopt import BayesSearchCV
from sklearn.ensemble import RandomForestClassifier
import math
import matplotlib.pyplot as plt

## Lendo Base

In [2]:
df_tr = pd.read_csv('./data/train.csv')

In [46]:
df_tr[df_tr['indoor'] == True].shape

(4621, 16)

## Separando a Base em Treino e Teste

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    df_tr.drop(['ponto_id', 'lat', 'lon', 'indoor'], axis=1),
    df_tr[['indoor']],
    test_size=0.1, 
    random_state=42,
    stratify=df_tr['indoor']
)

In [11]:
X_test.shape

(678, 12)

## Pré-processamento

* Scaler

In [13]:
scaler_dtr = RobustScaler()
scaler_knr = RobustScaler()
scaler_rfr = RobustScaler()

## Definindo Modelos e Otimizador de Hiper-parâmetro

* Decision Tree Regressor

In [14]:
dtr = DecisionTreeClassifier(random_state=10)

In [15]:
param_tree = {
    'criterion':  ['gini', 'entropy'],
    'min_samples_split': [x for x in range(2, 101)],
    'min_samples_leaf': [x for x in range(2, 51)],
    'max_features':  ['auto','sqrt','log2'],
    'max_depth': [x for x in range(1, 31)],
    'splitter': ['best', 'random']
}

opt_dtr = BayesSearchCV(dtr, param_tree, n_iter=25, random_state=10)

* KNN

In [16]:
knr = KNeighborsClassifier()

In [17]:
param_knn = {
    'n_neighbors': [x for x in range(1, 101)],
    'weights': ['uniform', 'distance'],
    'p': [1, 2, 3]
}

opt_knr = BayesSearchCV(knr, param_knn, n_iter=25, random_state=10)

* Random Forest Regressor

In [18]:
rfr = RandomForestClassifier(random_state=10)

In [19]:
param_rf = {
    'n_estimators': [x for x in range(1, 51)],
    'criterion': ['gini', 'entropy'],
    'min_samples_split': [x for x in range(2, 101)],
    'min_samples_leaf' : [x for x in range(2, 51)],
    'max_depth' : [x for x in range(1, 11)],
    'max_features' : ['auto','sqrt','log2']
 }

opt_rfr = BayesSearchCV(rfr, param_rf, n_iter=25, random_state=10)

## Conectando Pipelines

In [20]:
pipe_dtr = Pipeline(steps=[
    ('scaler', scaler_dtr),
    ('model', opt_dtr)
])

pipe_knr = Pipeline(steps=[
    ('scaler', scaler_knr),
    ('model', opt_knr)
])

pipe_rfr = Pipeline(steps=[
    ('scaler', scaler_rfr),
    ('model', opt_rfr)
])

## Treinando Modelos

> Função de avaliação

In [41]:
from sklearn.metrics import accuracy_score, recall_score

def eval_regressor(X, y, model):
    print('Accuracy:', accuracy_score(y, model.predict(X)))
    print('Recall:', recall_score(y, model.predict(X)))

* DTR

In [38]:
pipe_dtr.fit(X_train, y_train)



In [39]:
pipe_dtr.steps[-1][-1].best_estimator_.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'entropy',
 'max_depth': 13,
 'max_features': 'log2',
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 7,
 'min_samples_split': 8,
 'min_weight_fraction_leaf': 0.0,
 'random_state': 10,
 'splitter': 'best'}

In [42]:
print('-----> Train:')
eval_regressor(X_train, y_train, pipe_dtr)
print('-----> Test:')
eval_regressor(X_test, y_test, pipe_dtr)

-----> Train:
Accuracy: 0.9996719698212235
Recall: 0.9995191151719163
-----> Test:
Accuracy: 1.0
Recall: 1.0


* KNR

In [31]:
pipe_knr.fit(X_train, y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


In [32]:
pipe_knr.steps[-1][-1].best_estimator_.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 6,
 'p': 1,
 'weights': 'uniform'}

In [43]:
print('-----> Train:')
eval_regressor(X_train, y_train, pipe_knr)
print('-----> Test:')
eval_regressor(X_test, y_test, pipe_knr)

-----> Train:
Accuracy: 1.0
Recall: 1.0
-----> Test:
Accuracy: 1.0
Recall: 1.0


* RFR

In [34]:
pipe_rfr.fit(X_train, y_train)

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  warn(
  estimator.fit(X_train, y_train, **fit_params)
  warn(
  estimator.fit(X_train, y_train, **fit_params)
  warn(
  estimator.fit(X_train, y_train, **fit_params)
  warn(
  estimator.fit(X_train, y_train, **fit_params)
  warn(
  estimator.fit(X_train, y_train, **fit_params)
  warn(
  estimator.fit(X_train, y_train, **fit_params)
  warn(
  estimator.fit(X_train, y_train, **fit_params)
  warn(
  estimator.fit(X_train, y_train, **fit_params)
  warn(
  estimator.fit(

In [35]:
pipe_rfr.steps[-1][-1].best_estimator_.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'entropy',
 'max_depth': 3,
 'max_features': 'log2',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 32,
 'min_samples_split': 52,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 33,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 10,
 'verbose': 0,
 'warm_start': False}

In [44]:
print('-----> Train:')
eval_regressor(X_train, y_train, pipe_rfr)
print('-----> Test:')
eval_regressor(X_test, y_test, pipe_rfr)

-----> Train:
Accuracy: 1.0
Recall: 1.0
-----> Test:
Accuracy: 1.0
Recall: 1.0
