# **Churn EDA**


## Dependencies and Settings


In [None]:
import pandas as pd

ds_path = '../datasets'
def get_ds_path(filename: str):
    return f'{ds_path}/{filename}'

## Data collection and preparation


### Customers Dataframe

---


In [None]:
# Load dataframe
customers_file = 'churn_customers.csv'
df_customers = pd.read_csv(get_ds_path(customers_file))

In [None]:
# List the 5 first records on Dataframe
df_customers.head(5)

In [None]:
# List the 5 last records on Dataframe
df_customers.tail(5)

In [None]:
# Shows Dataframe schema
df_customers.info()

#### Rename colums


In [None]:
# Rename all the columns
df_customers.columns = ['CustomerID', 'Gender', 'Above65yo', 'HasPartner', 'HasDependents']

df_customers.info()

### Contracts Dataframe

---


In [None]:
# Load dataframe
contracts_file = 'churn_contracts.csv'
df_contracts = pd.read_csv(get_ds_path(contracts_file))

In [None]:
# List the 5 first records on Dataframe
df_contracts.head(5)

In [None]:
# List the 5 last records on Dataframe
df_contracts.tail(5)

In [None]:
# Shows Dataframe schema
df_contracts.info()

In [None]:
# Renames the ternure column, capitalizing it
df_contracts.rename(
    columns={'tenure': 'Tenure'},
    inplace=True
)

df_contracts.info()

#### Data transformation


In [None]:
# Transform TotalCharges from string to float
df_contracts.TotalCharges = pd.to_numeric(
    df_contracts.TotalCharges, 
    errors='coerce'
) 

df_contracts.info()

After transformation, TotalCharges now has 11 null elements


### Services Dataframe

---


In [None]:
# Load dataframe
services_file = 'churn_services.csv'
df_services = pd.read_csv(get_ds_path(services_file))

In [None]:
# List the 5 first records on Dataframe
df_services.head(5)

In [None]:
# List the 5 last records on Dataframe
df_services.tail(5)

In [None]:
# Shows Dataframe schema
df_services.info()

### Unify Contracts, Costumers and Services Dataframes

---


In [None]:
# Count the records on each dataframe
contracts_length = len(df_contracts)
services_length = len(df_services)
costumers_length = len(df_customers)

length_comparative = f'{contracts_length}-{services_length}-{costumers_length}'

length_comparative

Rename primary identifiers


In [None]:
# Rename identifiers
df_services.rename(
    columns={'customerID': 'CustomerID'},
    inplace=True
)

df_services.info()

In [None]:
df_contracts.rename(
    columns={'customerID': 'CustomerID'},
    inplace=True
)

df_contracts.info()

In [None]:
# Unifies Customers and Services Dataframe, creating a new one
df_churn = df_customers.merge(df_services, on=['CustomerID']).merge(df_contracts, on=['CustomerID'])

df_churn.info()

## Absent value tratative


In [None]:
# Absent values detection of all columns in a Dataframe
df_churn.isna().sum()

In [None]:
# Absent values detection of a column in a Dataframe
df_churn.TotalCharges.isna().sum()

In [None]:
# How much lines has at least 1 column with absent value
df_churn[df_churn.isna().any(axis=1)]

In [None]:
# How much columns has at least 1 with absent value
df_churn.isna().any(axis=0).sum()

Absent values remotion


In [None]:
# Direct and specific way to remove a column with absent values
df_churn.drop(columns=['TotalCharges'], axis=1)

In [None]:
# Remove columns with absent values
df_churn.dropna(axis=1)

In [None]:
# Remove columns with all absent values
df_churn.dropna(axis=1, how='all')

In [None]:
# Remove lines with absent values
df_churn.dropna(axis=0)

In [None]:
# Remove lines with all absent values
df_churn.dropna(axis=0, how='all')

Inserting missing values


In [None]:
# Replace all absent values with 0
df_churn.fillna(0)

In [None]:
# Replace all absent values by column type
df_churn.fillna(value={'TotalCharges': 0, 'Gender': 'Not declared'})

In [62]:
# Replace all absent values with the mean
mean_TotalCharges = df_churn.TotalCharges.mean()

mean_TotalCharges

2283.3004408418656

In [None]:
df_churn.fillna(value={'TotalCharges': mean_TotalCharges, 'Gender': 'Not declared'})