In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [4]:
base = pd.read_excel('Base Dados.xlsx')

In [6]:
x = pd.DataFrame(base[['VAL_SH',
                       'VAL_SP',
                       'QT_DIARIAS',
                      'DIAR_ACOM']]) 
y = base['MORTE']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                   train_size=0.7,
                                                   random_state=0)

#### Treinar modelo

In [9]:
from sklearn.tree import DecisionTreeClassifier

In [12]:
modelo_ar = DecisionTreeClassifier()
modelo_ar = modelo_ar.fit(X_train, y_train)
y_pred_ar = modelo_ar.predict(X_test)

In [13]:
from sklearn import metrics

metrics.confusion_matrix(y_test,y_pred_ar)

array([[4875,  444],
       [ 428,  253]], dtype=int64)

In [15]:
y_test.shape

(6000,)

In [17]:
print(metrics.classification_report(y_test,y_pred_ar))

              precision    recall  f1-score   support

           0       0.92      0.92      0.92      5319
           1       0.36      0.37      0.37       681

    accuracy                           0.85      6000
   macro avg       0.64      0.64      0.64      6000
weighted avg       0.86      0.85      0.86      6000



### Cross-validation

### K-fold

In [18]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [20]:
#class sklearn.model_selection.KFold(n_splits=5, *, shuffle=False, random_state=None)
kf = KFold(shuffle=True,
           random_state=0)

In [26]:
modelo_kf = DecisionTreeClassifier()

score = cross_val_score(modelo_kf,
                        X_train,
                        y_train, 
                        cv=kf,
                        scoring='f1')

In [27]:
score.mean()

0.3466947860338558

In [28]:
from sklearn.model_selection import StratifiedKFold

In [29]:
modelo_skf = DecisionTreeClassifier()

In [30]:
skf = StratifiedKFold(n_splits=5)

In [31]:
score = cross_val_score(modelo_kf,
                        X_train,
                        y_train, 
                        cv=skf,
                        scoring='f1')

In [32]:
score

array([0.33647799, 0.34461538, 0.3588907 , 0.2891933 , 0.3400936 ])

## Bootstrap

In [33]:
from sklearn.model_selection import ShuffleSplit

In [34]:
modelo_sp = DecisionTreeClassifier()

In [None]:
#ShuffleSplit(n_splits=10, *, test_size=None, train_size=None, random_state=None)

In [36]:
bs = ShuffleSplit(n_splits=5,
                 test_size=0.5,
                 random_state=0)

In [41]:
score = cross_val_score(modelo_kf,
                        X_train,
                        y_train, 
                        cv=bs,
                        scoring='f1')

In [42]:
score.mean()

0.3295997439780313

## Grid Search

In [43]:
from sklearn.model_selection import GridSearchCV

In [56]:
modelo_ar = DecisionTreeClassifier(random_state=0)

In [77]:
param_grid = {'max_depth':range(60),
              'min_samples_split':[10]}

In [102]:
grid_search = GridSearchCV(modelo_ar,
                           param_grid,
                           cv=5,
                           scoring='recall')

In [103]:
grid_search.fit(X_train,y_train)

5 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\ander\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ander\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 937, in fit
    super().fit(
  File "C:\Users\ander\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 306, in fit
    raise ValueError("max_depth must be greater than zero. ")
ValueError: max_depth must be greater than zero. 

 0.07159559 0.09355099 0.11929181 0.1406242  0.16070661 0.18960194
 0.20402989 0.21030342 0.22098

GridSearchCV(cv=5,
             estimator=DecisionTreeClassifier(max_depth=8,
                                              min_samples_split=10),
             param_grid={'max_depth': range(0, 60), 'min_samples_split': [10]},
             scoring='recall')

In [104]:
melhores_param = grid_search.best_estimator_

In [105]:
melhores_param

DecisionTreeClassifier(max_depth=55, min_samples_split=10)

In [109]:
y_pred = grid_search.predict(X_test)

In [110]:
from sklearn import metrics

In [111]:
print(metrics.classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.92      0.93      0.92      5319
           1       0.37      0.33      0.35       681

    accuracy                           0.86      6000
   macro avg       0.64      0.63      0.64      6000
weighted avg       0.85      0.86      0.86      6000



### Random Forest

In [113]:
from sklearn.ensemble import RandomForestClassifier

In [133]:
modelo_rf = RandomForestClassifier(n_estimators=100,
                                  random_state=0)

In [134]:
modelo_rf.fit(X_train,y_train)

RandomForestClassifier(random_state=0)

In [135]:
y_pred = modelo_rf.predict(X_test)

In [136]:
print(metrics.classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.91      0.97      0.94      5319
           1       0.50      0.26      0.34       681

    accuracy                           0.89      6000
   macro avg       0.70      0.61      0.64      6000
weighted avg       0.86      0.89      0.87      6000



In [141]:
importancia = pd.DataFrame({

    'nome': modelo_rf.feature_names_in_,
    'importancia': modelo_rf.feature_importances_
})

In [142]:
importancia

Unnamed: 0,nome,importancia
0,VAL_SH,0.449264
1,VAL_SP,0.360285
2,QT_DIARIAS,0.139576
3,DIAR_ACOM,0.050875
