In [None]:
import warnings
import datetime as dt
import pandas as pd
import numpy as np
import xgboost
import pickle
from sklearn.metrics import (accuracy_score, confusion_matrix, 
                             balanced_accuracy_score, precision_recall_curve)
from sklearn.preprocessing import MinMaxScaler,StandardScaler,QuantileTransformer
from sklearn.utils import resample
from sklearn.pipeline import Pipeline
from sklearn.ensemble import (RandomForestClassifier,BaggingClassifier, 
                              AdaBoostClassifier,GradientBoostingClassifier)
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import model_selection
from imblearn.metrics import classification_report_imbalanced, geometric_mean_score
from imblearn.ensemble import (BalancedBaggingClassifier,BalancedRandomForestClassifier,
                              RUSBoostClassifier,EasyEnsembleClassifier)
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN, SMOTENC, BorderlineSMOTE
from imblearn.under_sampling import (InstanceHardnessThreshold,EditedNearestNeighbours, AllKNN,
                                    OneSidedSelection)

In [None]:
warnings.filterwarnings('ignore')

In [None]:
#1. prepara modelos
pipe=Pipeline([('scl',QuantileTransformer(output_distribution='normal')),
               ('clf',BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                                       sampling_strategy='auto',
                                                       replacement=False,random_state=0,n_jobs=-1))])
clf=[{'clf':[BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                                       sampling_strategy='auto',replacement=False,
                                                       random_state=0,n_jobs=8)]},
              {'clf':[BalancedRandomForestClassifier(n_estimators=150,random_state=0,
                                                            sampling_strategy='auto',
                                                            replacement=False,n_jobs=-1,
                                                            warm_start=True,bootstrap=False)]},
              {'clf':[RUSBoostClassifier(base_estimator=DecisionTreeClassifier(),
                                                n_estimators=130,learning_rate=1.0,
                                                algorithm='SAMME.R',sampling_strategy='auto',
                                                replacement=False,random_state=0)]},
              {'clf':[EasyEnsembleClassifier(n_estimators=130,
                                                    base_estimator=AdaBoostClassifier(),
                                                    warm_start=True,sampling_strategy='auto',
                                                    replacement=False,n_jobs=-1,random_state=0,
                                                    verbose=0)]}]
##
clf1=GridSearchCV(pipe,clf,cv=6,verbose=0)

## Roubo

In [None]:
inmodrou = dt.datetime.now()

In [None]:
roubo=pd.read_csv('/home/dev/poc/arquivos/model/out/roubo-v6.csv')
roubo['possuisinistro']=np.where(roubo['possuisinistro']=='SIM',1,0)
roubo['tpsinistro'].fillna(0,inplace=True)
yr=roubo['tpsinistro']
xr=roubo.drop(['nrviagem','cto','idade','tpsinistro'],axis=1)
xr=xr.fillna(0)
x_tre,x_tse,y_tre,y_tse=train_test_split(xr,yr,random_state=10,test_size=0.3,stratify=yr)
x_tre=np.array(x_tre)
x_tse=np.array(x_tse)
y_tre=np.array(y_tre)
y_tse=np.array(y_tse)

In [None]:
clf1.fit(x_tre,y_tre)
melhor_modelo_roubo=clf1.best_params_['clf']
# salva modelo
pickle.dump(melhor_modelo_roubo,open('/home/dev/poc/arquivos/model/out/modelo_roubo-v6.sav','wb'))

In [None]:
fimmodrou = dt.datetime.now()
tempmodrou = fimmodrou - inmodrou
tempmodroustr = (str(tempmodrou))
print("Atualização do modelo de roubo levou {}hs {}min {}seg para ser concluído." .format(tempmodroustr[:-13], tempmodroustr[2:-10], tempmodroustr[5:-7]))

In [None]:
print('Best score: {0}'.format(clf1.best_score_))  

## Acidente

In [None]:
inmodaci = fimmodrou

In [None]:
acidente=pd.read_csv('/home/dev/poc/arquivos/model/out/acidente-v6.csv')
acidente['possuisinistro']=np.where(acidente['possuisinistro']=='SIM',1,0)
acidente['tpsinistro'].fillna(0,inplace=True)
ya=acidente['tpsinistro']
xa=acidente.drop(['nrviagem','cto','idade','tpsinistro'],axis=1)
xa=xa.fillna(0)
x_tre,x_tse,y_tre,y_tse=train_test_split(xa,ya,random_state=10,test_size=0.3,stratify=ya)
x_tr=np.array(x_tre)
x_ts=np.array(x_tse)
y_tr=np.array(y_tre)
y_ts=np.array(y_tse)

In [None]:
clf1.fit(x_tr,y_tr)
melhor_modelo_acidente=clf1.best_params_['clf']
# salva modelo
pickle.dump(melhor_modelo_acidente,open('/home/dev/poc/arquivos/model/out/modelo_acidente-v6.sav','wb'))

In [None]:
fimmodaci = dt.datetime.now()
tempmodaci = fimmodaci - inmodaci
tempmodacistr = (str(tempmodaci))
print("Atualização do modelo de acidente levou {}hs {}min {}seg para ser concluído." .format(tempmodroustr[:-13], tempmodroustr[2:-10], tempmodroustr[5:-7]))