# Analise Exploratoria 2

## Bibliotecas necessarias + opções de visualização do Pandas

In [1]:
## Bibliotecas necessarias + opções de visualização do Pandas
import pandas as pd
import numpy as np
import json
from urllib.request import urlopen
import warnings

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = 100
pd.options.display.float_format = '{:,.2f}'.format
warnings.filterwarnings('ignore')

# Definindo uma seed padrao caso necessite de reproducibilidade.
myseed = 484

# Arquivos auxiliares
from aux_files import *

## Recebendo os dados e abrindo em um DataFrame

In [2]:
data = 'df_01_tratado_inicialmente.csv'
df = pd.read_csv(data)
df.head()

Unnamed: 0,customerID,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,Charges_Total
0,0002-ORFBO,No,Female,No,Yes,Yes,9,Yes,No,DSL,No,Yes,No,Yes,Yes,No,One year,Yes,Mailed check,65.6,590.4
1,0003-MKNFE,No,Male,No,No,No,9,Yes,Yes,DSL,No,No,No,No,No,Yes,Month-to-month,No,Mailed check,59.9,539.1
2,0004-TLHLJ,Yes,Male,No,No,No,4,Yes,No,Fiber optic,No,No,Yes,No,No,No,Month-to-month,Yes,Electronic check,73.9,295.6
3,0011-IGKFF,Yes,Male,Yes,Yes,No,13,Yes,No,Fiber optic,No,Yes,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,98.0,1274.0
4,0013-EXCHZ,Yes,Female,Yes,Yes,No,3,Yes,No,Fiber optic,No,No,No,Yes,Yes,No,Month-to-month,Yes,Mailed check,83.9,251.7


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   Churn             7043 non-null   object 
 2   gender            7043 non-null   object 
 3   SeniorCitizen     7043 non-null   object 
 4   Partner           7043 non-null   object 
 5   Dependents        7043 non-null   object 
 6   tenure            7043 non-null   int64  
 7   PhoneService      7043 non-null   object 
 8   MultipleLines     7043 non-null   object 
 9   InternetService   7043 non-null   object 
 10  OnlineSecurity    7043 non-null   object 
 11  OnlineBackup      7043 non-null   object 
 12  DeviceProtection  7043 non-null   object 
 13  TechSupport       7043 non-null   object 
 14  StreamingTV       7043 non-null   object 
 15  StreamingMovies   7043 non-null   object 
 16  Contract          7043 non-null   object 


In [4]:
df.describe()

Unnamed: 0,tenure,Charges_Monthly,Charges_Total
count,7043.0,7043.0,7043.0
mean,32.37,64.76,2279.58
std,24.56,30.09,2264.73
min,0.0,18.25,0.0
25%,9.0,35.5,394.0
50%,29.0,70.35,1393.6
75%,55.0,89.85,3786.1
max,72.0,118.75,8550.0


In [5]:
categorical_maps

{'Churn': {'No': 0, 'Yes': 1},
 'gender': {'Female': 0, 'Male': 1},
 'SeniorCitizen': {'No': 0, 'Yes': 1},
 'Partner': {'Yes': 1, 'No': 0},
 'Dependents': {'Yes': 1, 'No': 0},
 'PhoneService': {'Yes': 1, 'No': 0},
 'MultipleLines': {'Yes': 2, 'No': 1, 'No phone service': 0},
 'InternetService': {'DSL': 1, 'Fiber optic': 2, 'No': 0},
 'OnlineSecurity': {'No': 1, 'Yes': 2, 'No internet service': 0},
 'OnlineBackup': {'Yes': 2, 'No': 1, 'No internet service': 0},
 'DeviceProtection': {'Yes': 2, 'No': 1, 'No internet service': 0},
 'TechSupport': {'Yes': 2, 'No': 1, 'No internet service': 0},
 'StreamingTV': {'Yes': 2, 'No': 1, 'No internet service': 0},
 'StreamingMovies': {'Yes': 2, 'No': 1, 'No internet service': 0},
 'Contract': {'One year': 12, 'Month-to-month': 1, 'Two year': 24},
 'PaperlessBilling': {'Yes': 1, 'No': 0},
 'PaymentMethod': {'Mailed check': 0,
  'Electronic check': 1,
  'Credit card (automatic)': 2,
  'Bank transfer (automatic)': 3}}

In [10]:
df2 = df.copy()
for col in df2.columns:
    if col in categorical_maps.keys():
        df2[col] = df2[col].map(categorical_maps[col])


In [12]:
df2.head()

Unnamed: 0,customerID,Churn,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,Charges_Monthly,Charges_Total
0,0002-ORFBO,0,0,0,1,1,9,1,1,1,1,2,1,2,2,1,12,1,0,65.6,590.4
1,0003-MKNFE,0,1,0,0,0,9,1,2,1,1,1,1,1,1,2,1,0,0,59.9,539.1
2,0004-TLHLJ,1,1,0,0,0,4,1,1,2,1,1,2,1,1,1,1,1,1,73.9,295.6
3,0011-IGKFF,1,1,1,1,0,13,1,1,2,1,2,2,1,2,2,1,1,1,98.0,1274.0
4,0013-EXCHZ,1,0,1,1,0,3,1,1,2,1,1,1,2,2,1,1,1,0,83.9,251.7


In [15]:
df2['Churn'].value_counts(normalize=True)*100

0   73.46
1   26.54
Name: Churn, dtype: float64