# Deploy do melhor modelo

In [12]:
# Imports
import joblib
import pandas as pd
import numpy as np
import sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
# Comparativo dos modelos

In [10]:
df_modelos = pd.read_csv("C:/Projetos Pessoais/DataScience/analise_Previsao_Churn_teleComunicacoes/dados/df_comparativo_modelos")

In [11]:
df_modelos

Unnamed: 0,Nome,Algoritmo,ROC_AUC Score,AUC Score,Acuracia
0,modelo_v1,Logistic Regression,0.740794,0.822777,0.752133
1,modelo_v2,Decision Tree,0.734916,0.810766,0.721801
2,modelo_v3,KNN,0.726609,0.808208,0.689573
3,modelo_v4,Random Forest,0.700543,0.806495,0.763981
4,modelo_v5,SVM,0.703066,0.740904,0.725118
5,modelo_v6,Naive Bayes,0.724418,0.795474,0.708057


 - Podemos verificar que os melhores modelos foram: Regressão Logística e Random Forest
 
 ### Para esse problema, utilizarei o modelo_v1 (Logistic Regession), pelos seguintes motivos:
 - Possui maior ROC_AUC e AUC Score, quando comparado ao Random Forest, essa métrica verifica a capacidade do modelo de verificar as classes positivas corretamente

### Carregar dataset para deploy

In [4]:
dados_deploy = pd.read_csv("C:/Projetos Pessoais/DataScience/analise_Previsao_Churn_teleComunicacoes/dados/dados_para_deploy")

In [5]:
dados_deploy

Unnamed: 0,customerID,sexo,idoso,casado(a),possuiDependente,fidelidade,servico_telefonico,multiplas_linhas,servico_internet,seguranca_online,...,protecao_dispositivo,suporte_tecnico,Streaming_TV,Streaming_filmes,contrato,fatura_sem_papel,forma_pagamento,valor_mensal,valor_total_pago,Churn
0,8496-DMZUK,Male,0,No,No,30,Yes,Yes,Fiber optic,Yes,...,Yes,No,No,No,One year,No,Bank transfer (automatic),90.4,2820.65,No
1,4472-LVYGI,Female,0,Yes,Yes,0,No,No phone service,DSL,Yes,...,Yes,Yes,Yes,No,Two year,Yes,Bank transfer (automatic),52.55,52.55,No
2,3156-QLHBO,Male,0,No,Yes,2,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Month-to-month,No,Mailed check,19.25,48.35,No
3,1183-CANVH,Female,0,Yes,No,23,Yes,No,DSL,No,...,No,Yes,Yes,No,One year,No,Bank transfer (automatic),60.0,1347.15,No
4,2706-QZIHY,Female,0,Yes,No,15,Yes,Yes,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,25.2,387.9,No
5,8782-NUUOL,Male,0,No,No,60,Yes,No,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,No,Mailed check,79.0,4801.1,No
6,2325-WINES,Female,0,No,No,32,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),104.05,3416.85,No
7,0795-GMVQO,Male,0,Yes,No,67,Yes,Yes,Fiber optic,Yes,...,Yes,No,Yes,Yes,One year,No,Credit card (automatic),109.9,7332.4,No
8,3256-EZDBI,Male,1,Yes,No,51,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Credit card (automatic),29.45,1459.35,No
9,2320-SLKMB,Female,0,No,No,26,No,No phone service,DSL,No,...,Yes,Yes,Yes,No,Month-to-month,Yes,Credit card (automatic),44.45,1183.8,No


In [18]:
dados_deploy.shape

(10, 21)

In [39]:
map_idoso = {
    0: 'No',
    1: 'Yes'
}
dados_deploy['idoso'] = dados_deploy['idoso'].map(map_idoso)

### Pré Processamento dos novos dados

In [69]:
def transformaDF(df):
    
    # Gerando cópia do DF
    dados_copia = df.copy()
    
    # Retirando colunas não utilizadas para os treinamentos (exceto ID)
    dados_copia.drop(['customerID','multiplas_linhas', 'protecao_dispositivo', 
                      'servico_telefonico', 'sexo', 'Streaming_filmes',
                      'seguranca_online', 'backup_online','Churn'], axis=1, inplace=True)
    
    """
    Padronizando variáveis categóricas
    """ 
    dados_categoricos = dados_copia.select_dtypes(include=['object'])
    metaDados_cat = dados_categoricos.columns
        
    # Loop para aplica One Hot Encoding
    for cat in metaDados_cat:
        onehots = pd.get_dummies(dados_copia[cat], prefix=cat)
        dados_copia = dados_copia.join(onehots)
        dados_copia = dados_copia.drop(columns = cat)
        
    # Lista com os novos nomes de variaveis categoricas
    variaveis_processadas = dados_copia.select_dtypes(include=['bool'])
    metaDados_bol = variaveis_processadas.columns
    
    # Loop FOR para binarizar as categoricas com 0 e 1
    for col in metaDados_bol:
        dados_copia[col] = np.where(dados_copia[col] == True, 1, 0)
    
    """
    Padronizando variáveis numericas
    """ 
    # Lista das variaveis numericas
    nums = ['fidelidade','valor_mensal', 'valor_total_pago']
    
    # Aplicando normalização dos dados para as variaveis numericas
    for n in nums:
        dados_copia[n] = MinMaxScaler().fit_transform(dados_copia[n].values.reshape(len(dados_copia), 1))
    
    return dados_copia

transformaDF(dados_deploy)

Unnamed: 0,fidelidade,valor_mensal,valor_total_pago,idoso_No,idoso_Yes,casado(a)_No,casado(a)_Yes,possuiDependente_No,possuiDependente_Yes,servico_internet_DSL,...,Streaming_TV_No internet service,Streaming_TV_Yes,contrato_Month-to-month,contrato_One year,contrato_Two year,fatura_sem_papel_No,fatura_sem_papel_Yes,forma_pagamento_Bank transfer (automatic),forma_pagamento_Credit card (automatic),forma_pagamento_Mailed check
0,0.447761,0.784887,0.380599,1,0,1,0,1,0,0,...,0,0,0,1,0,1,0,1,0,0
1,0.0,0.367347,0.000577,1,0,0,1,0,1,1,...,0,1,0,0,1,0,1,1,0,0
2,0.029851,0.0,0.0,1,0,1,0,0,1,0,...,1,0,1,0,0,1,0,0,0,1
3,0.343284,0.449531,0.178307,1,0,0,1,1,0,1,...,0,1,0,1,0,1,0,1,0,0
4,0.223881,0.065637,0.046616,1,0,0,1,1,0,0,...,1,0,0,0,1,1,0,0,0,1
5,0.895522,0.659129,0.652487,1,0,1,0,1,0,1,...,0,1,0,1,0,1,0,0,0,1
6,0.477612,0.935466,0.462449,1,0,1,0,1,0,0,...,0,1,0,1,0,0,1,0,1,0
7,1.0,1.0,1.0,1,0,0,1,1,0,0,...,0,1,0,1,0,1,0,0,1,0
8,0.761194,0.112521,0.193711,0,1,0,1,1,0,1,...,0,0,1,0,0,0,1,0,1,0
9,0.38806,0.277992,0.155882,1,0,1,0,1,0,1,...,0,1,1,0,0,0,1,0,1,0


In [65]:
novos_dados = transformaDF(dados_deploy)
novos_dados.dtypes

fidelidade                                     int64
valor_mensal                                 float64
valor_total_pago                             float64
idoso_No                                       int32
idoso_Yes                                      int32
casado(a)_No                                   int32
casado(a)_Yes                                  int32
possuiDependente_No                            int32
possuiDependente_Yes                           int32
servico_internet_DSL                           int32
servico_internet_Fiber optic                   int32
servico_internet_No                            int32
suporte_tecnico_No                             int32
suporte_tecnico_No internet service            int32
suporte_tecnico_Yes                            int32
Streaming_TV_No                                int32
Streaming_TV_No internet service               int32
Streaming_TV_Yes                               int32
contrato_Month-to-month                       

In [44]:
dados_deploy.dtypes

customerID               object
sexo                     object
idoso                    object
casado(a)                object
possuiDependente         object
fidelidade                int64
servico_telefonico       object
multiplas_linhas         object
servico_internet         object
seguranca_online         object
backup_online            object
protecao_dispositivo     object
suporte_tecnico          object
Streaming_TV             object
Streaming_filmes         object
contrato                 object
fatura_sem_papel         object
forma_pagamento          object
valor_mensal            float64
valor_total_pago        float64
Churn                    object
dtype: object

In [46]:
if dados_deploy.select_dtypes(include=['object']) 

SyntaxError: invalid syntax (752396010.py, line 1)

### Carregando o modelo_v1 do disco

In [None]:
# Carregando o modelo do disco
melhor_modelo = joblib.load('modelos/modelo_v1.pkl')
melhor_modelo

### Gerando previsões para novo dataset 