# Análise Exploratória

## Bibliotecas necessarias + opções de visualização do Pandas

In [1]:
import pandas as pd
import numpy as np
import json
from urllib.request import urlopen

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

notrandomseed = 484


## Recebendo os dados e abrindo em um DataFrame

In [2]:
data_url = "https://github.com/sthemonica/alura-voz/blob/main/Dados/Telco-Customer-Churn.json?raw=true"
response = urlopen(data_url)
data_json = json.loads(response.read())

df = pd.json_normalize(data_json, max_level=2, sep='_')
df.head()

Unnamed: 0,customerID,Churn,customer_gender,customer_SeniorCitizen,customer_Partner,customer_Dependents,customer_tenure,phone_PhoneService,phone_MultipleLines,internet_InternetService,internet_OnlineSecurity,internet_OnlineBackup,internet_DeviceProtection,internet_TechSupport,internet_StreamingTV,internet_StreamingMovies,account_Contract,account_PaperlessBilling,account_PaymentMethod,account_Charges_Monthly,account_Charges_Total
0,0002-ORFBO,No,Female,0,Yes,Yes,9,Yes,No,DSL,No,Yes,No,Yes,Yes,No,One year,Yes,Mailed check,65.6,593.3
1,0003-MKNFE,No,Male,0,No,No,9,Yes,Yes,DSL,No,No,No,No,No,Yes,Month-to-month,No,Mailed check,59.9,542.4
2,0004-TLHLJ,Yes,Male,0,No,No,4,Yes,No,Fiber optic,No,No,Yes,No,No,No,Month-to-month,Yes,Electronic check,73.9,280.85
3,0011-IGKFF,Yes,Male,1,Yes,No,13,Yes,No,Fiber optic,No,Yes,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,98.0,1237.85
4,0013-EXCHZ,Yes,Female,1,Yes,No,3,Yes,No,Fiber optic,No,No,No,Yes,Yes,No,Month-to-month,Yes,Mailed check,83.9,267.4


## Analisando superficialmento informações sobre o DataFrame

In [21]:
df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7267 entries, 0 to 7266
Data columns (total 21 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   customerID                 7267 non-null   object 
 1   Churn                      7267 non-null   object 
 2   customer_gender            7267 non-null   object 
 3   customer_SeniorCitizen     7267 non-null   int64  
 4   customer_Partner           7267 non-null   object 
 5   customer_Dependents        7267 non-null   object 
 6   customer_tenure            7267 non-null   int64  
 7   phone_PhoneService         7267 non-null   object 
 8   phone_MultipleLines        7267 non-null   object 
 9   internet_InternetService   7267 non-null   object 
 10  internet_OnlineSecurity    7267 non-null   object 
 11  internet_OnlineBackup      7267 non-null   object 
 12  internet_DeviceProtection  7267 non-null   object 
 13  internet_TechSupport       7267 non-null   objec

Verificando linhas duplicadas

In [13]:
df.duplicated().sum()

0

### Criação de um DataFrame para armazenar informações de verificação

In [20]:
df_check = pd.DataFrame({'coluna': df.columns})

# Verificando quantos valores unicos existem em cada  (7267 é o número máximo possível)
df_check['unique'] = df.nunique().values

# Verificando quantidades de nulos
df_check['isnull'] = df.isnull().sum().values

# Verificando espações em branco e valores faltantes
temp_series1 = df[df.select_dtypes('object').columns].apply(lambda x: x.str.strip().isin(['']).sum()) # colunas dados object (str)
temp_series2 = df.select_dtypes(['float', 'integer']).isnull().sum() # colunas dados numericos (int e float)
temp_series = pd.concat([temp_series1, temp_series2])
temp_series.name = 'blank'
df_check = df_check.merge(temp_series, how='left', left_on='coluna', right_index=True)

# Verificando data type
df_check['dtypes'] = df.dtypes.values


# Display DF
df_check

Unnamed: 0,coluna,unique,isnull,blank,dtypes
0,customerID,7267,0,0,object
1,Churn,3,0,224,object
2,customer_gender,2,0,0,object
3,customer_SeniorCitizen,2,0,0,int64
4,customer_Partner,2,0,0,object
5,customer_Dependents,2,0,0,object
6,customer_tenure,73,0,0,int64
7,phone_PhoneService,2,0,0,object
8,phone_MultipleLines,3,0,0,object
9,internet_InternetService,3,0,0,object


### Analisando uma amostra

In [16]:
df.sample(15, random_state=notrandomseed)

Unnamed: 0,customerID,Churn,customer_gender,customer_SeniorCitizen,customer_Partner,customer_Dependents,customer_tenure,phone_PhoneService,phone_MultipleLines,internet_InternetService,internet_OnlineSecurity,internet_OnlineBackup,internet_DeviceProtection,internet_TechSupport,internet_StreamingTV,internet_StreamingMovies,account_Contract,account_PaperlessBilling,account_PaymentMethod,account_Charges_Monthly,account_Charges_Total
1630,2320-JRSDE,Yes,Female,0,Yes,Yes,1,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Month-to-month,Yes,Electronic check,19.9,19.9
5777,7893-IXHRQ,Yes,Male,0,No,No,1,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Month-to-month,No,Electronic check,20.55,20.55
3281,4573-JKNAE,No,Male,0,No,Yes,12,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,No,Bank transfer (automatic),19.35,212.3
967,1357-BIJKI,No,Male,0,Yes,No,50,Yes,Yes,Fiber optic,No,Yes,No,No,Yes,Yes,One year,Yes,Electronic check,98.25,4858.7
2505,3500-NSDOA,No,Male,0,Yes,Yes,68,Yes,Yes,DSL,No,Yes,No,Yes,No,Yes,Two year,No,Credit card (automatic),70.8,4859.95
4466,6124-ACRHJ,No,Female,0,No,No,1,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Month-to-month,No,Mailed check,19.75,19.75
1309,1891-FZYSA,Yes,Male,1,Yes,No,69,Yes,Yes,Fiber optic,No,Yes,No,No,Yes,No,Month-to-month,Yes,Electronic check,89.95,6143.15
3368,4678-DVQEO,Yes,Female,0,No,No,1,Yes,No,DSL,No,No,No,Yes,No,No,Month-to-month,Yes,Electronic check,52.2,52.2
5944,8111-SLLHI,Yes,Male,1,Yes,No,39,Yes,Yes,Fiber optic,No,Yes,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,105.65,4284.8
2998,4163-HFTUK,No,Male,0,No,No,51,Yes,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,One year,No,Electronic check,19.1,1007.8
