In [1]:
# Importando as libs 
import pandas as pd
import numpy as np 

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Carregando o dataset dos peixes. 
dataframe = pd.read_csv('/content/drive/MyDrive/PFC/Dados Loja/TREINAMENTO/dados_treinamento.csv',sep=';')

In [4]:
# Apresentando informações.
dataframe.head()

Unnamed: 0,Idade,CEP,Sexo,Estado civil,Renda Mensal,Inadimplente
0,40,88950000,2,1,700,1
1,43,0,0,0,1600,0
2,49,88950000,2,1,1700,0
3,43,0,0,0,800,1
4,62,88950000,1,0,1800,0


In [5]:
dataframe.drop(['CEP'],axis=1,inplace = True)

In [6]:
# Armazenando as informações de entrada.
x = dataframe.drop(['Inadimplente'],axis=1)

In [7]:
# Armazenando a variavel target.
y = dataframe['Inadimplente']

In [8]:
# Importando o label encoder.
from sklearn.preprocessing import LabelEncoder

In [9]:
# Criando o modelo.
modelLabelEncoder = LabelEncoder()

In [10]:
# Realizando a transformação.
yOrdinal = modelLabelEncoder.fit_transform(y)

In [11]:
# Importando modelos para serem utilizados no pipeline.
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# Importando o pipeline.
from sklearn.pipeline import  Pipeline

In [12]:
# Cria um pipeline.
pipe = Pipeline(steps=[
                    ('classifier', SVC())
                    ]
            )
    

In [13]:
# Criar espaço de algoritmos de aprendizagem candidatos e seus hiperparâmetros.
search_space = [{'classifier': [RandomForestClassifier(n_jobs=-1)],
                'classifier__n_estimators' : [10,100,500]
                },
               
                {'classifier':[SVC(random_state=5,cache_size = 400)],
                  'classifier__kernel' : ['rbf'],
                  'classifier__C' : [0.5,0.75,1,1.25,1.5]
                },
                {'classifier':[KNeighborsClassifier(n_jobs=-1)],
                  'classifier__n_neighbors' : [5,10,15]
                },
             
                ]

In [14]:
# Importando o GRIDSEARCH
from sklearn.model_selection import GridSearchCV

In [15]:
 # Cria o gridsearch de busca.
grid_model = GridSearchCV(estimator = pipe,param_grid = search_space, 
                      scoring = 'accuracy' ,
                      cv=5, verbose = 2, n_jobs=-1)

In [16]:
# Treina todos os modelos.
models = grid_model.fit(x, yOrdinal)
    

Fitting 5 folds for each of 11 candidates, totalling 55 fits


In [17]:
models

GridSearchCV(cv=5, estimator=Pipeline(steps=[('classifier', SVC())]), n_jobs=-1,
             param_grid=[{'classifier': [RandomForestClassifier(n_estimators=10,
                                                                n_jobs=-1)],
                          'classifier__n_estimators': [10, 100, 500]},
                         {'classifier': [SVC(cache_size=400, random_state=5)],
                          'classifier__C': [0.5, 0.75, 1, 1.25, 1.5],
                          'classifier__kernel': ['rbf']},
                         {'classifier': [KNeighborsClassifier(n_jobs=-1)],
                          'classifier__n_neighbors': [5, 10, 15]}],
             scoring='accuracy', verbose=2)

In [18]:
models.best_score_

0.9441892406440383

In [19]:
models.best_estimator_

Pipeline(steps=[('classifier',
                 RandomForestClassifier(n_estimators=10, n_jobs=-1))])

In [20]:
# Importando a lib para train_test_split = método houldout
from sklearn.model_selection import train_test_split

In [21]:
# Separando os dados de em treino e teste, de acordo com a % passada.
x_train,x_test, y_train,y_test = train_test_split(x,y,test_size = 0.20, random_state = 1)

In [22]:
y_predict = models.best_estimator_.predict(x_test)

In [23]:
from sklearn.metrics import classification_report

In [24]:
# Valores do melhor modelo.
print(classification_report(y_test,y_predict,target_names=['Não Inadimplente','Inadimplente']))

                  precision    recall  f1-score   support

Não Inadimplente       0.99      1.00      0.99       329
    Inadimplente       0.98      0.93      0.95        55

        accuracy                           0.99       384
       macro avg       0.98      0.96      0.97       384
    weighted avg       0.99      0.99      0.99       384



In [25]:
models.best_estimator_.predict_proba(x_test)

array([[1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.2       , 0.8       ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.9       , 0.1       ],
       [1.        , 0.        ],
       [0.16666667, 0.83333333],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.775     , 0.225     ],
       [1.        , 0.        ],
       [0.8       , 0.2       ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.

In [26]:
from joblib import dump, load
dump(models.best_estimator_, 'melhor_modelo.joblib') 

['melhor_modelo.joblib']