In [2]:
# Our essentials
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

#Acquire functions
from acquire import get_connection, new_telco_churn_data, get_telco_churn_data

In [3]:
df = get_telco_churn_data()

In [4]:
df.head()

Unnamed: 0,payment_type_id,contract_type_id,internet_service_type_id,customer_id,gender,senior_citizen,partner,dependents,tenure,phone_service,...,tech_support,streaming_tv,streaming_movies,paperless_billing,monthly_charges,total_charges,churn,internet_service_type,contract_type,payment_type
0,2,1,3,0030-FNXPP,Female,0,No,No,3,Yes,...,No internet service,No internet service,No internet service,No,19.85,57.2,No,,Month-to-month,Mailed check
1,2,1,3,0031-PVLZI,Female,0,Yes,Yes,4,Yes,...,No internet service,No internet service,No internet service,No,20.35,76.35,Yes,,Month-to-month,Mailed check
2,1,1,3,0098-BOWSO,Male,0,No,No,27,Yes,...,No internet service,No internet service,No internet service,Yes,19.4,529.8,No,,Month-to-month,Electronic check
3,1,1,3,0107-WESLM,Male,0,No,No,1,Yes,...,No internet service,No internet service,No internet service,Yes,19.85,19.85,Yes,,Month-to-month,Electronic check
4,3,1,3,0114-RSRRW,Female,0,Yes,No,10,Yes,...,No internet service,No internet service,No internet service,Yes,19.95,187.75,No,,Month-to-month,Bank transfer (automatic)


In [5]:
#2.) Drop the customer_id columns.
df = df.drop(columns = ['customer_id'])

In [12]:
# information about the dataframe: column names, rows, datatypes, non-missing values
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7043 entries, 0 to 7042
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   payment_type_id           7043 non-null   int64  
 1   contract_type_id          7043 non-null   int64  
 2   internet_service_type_id  7043 non-null   int64  
 3   gender                    7043 non-null   object 
 4   senior_citizen            7043 non-null   int64  
 5   partner                   7043 non-null   object 
 6   dependents                7043 non-null   object 
 7   tenure                    7043 non-null   int64  
 8   phone_service             7043 non-null   object 
 9   multiple_lines            7043 non-null   object 
 10  online_security           7043 non-null   object 
 11  online_backup             7043 non-null   object 
 12  device_protection         7043 non-null   object 
 13  tech_support              7043 non-null   object 
 14  streamin

In [13]:
# describe numeric columns
df.describe()

Unnamed: 0,payment_type_id,contract_type_id,internet_service_type_id,senior_citizen,tenure,monthly_charges
count,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0
mean,2.315633,1.690473,1.872923,0.162147,32.371149,64.761692
std,1.148907,0.833755,0.737796,0.368612,24.559481,30.090047
min,1.0,1.0,1.0,0.0,0.0,18.25
25%,1.0,1.0,1.0,0.0,9.0,35.5
50%,2.0,1.0,2.0,0.0,29.0,70.35
75%,3.0,2.0,2.0,0.0,55.0,89.85
max,4.0,3.0,3.0,1.0,72.0,118.75


In [14]:
## look for any missing values. None most likely from the sql database
df.isnull().sum()

payment_type_id             0
contract_type_id            0
internet_service_type_id    0
gender                      0
senior_citizen              0
partner                     0
dependents                  0
tenure                      0
phone_service               0
multiple_lines              0
online_security             0
online_backup               0
device_protection           0
tech_support                0
streaming_tv                0
streaming_movies            0
paperless_billing           0
monthly_charges             0
total_charges               0
churn                       0
internet_service_type       0
contract_type               0
payment_type                0
dtype: int64

In [15]:
# drop duplicates rows...run just in case
df = df.drop_duplicates()

In [16]:
df

Unnamed: 0,payment_type_id,contract_type_id,internet_service_type_id,gender,senior_citizen,partner,dependents,tenure,phone_service,multiple_lines,...,tech_support,streaming_tv,streaming_movies,paperless_billing,monthly_charges,total_charges,churn,internet_service_type,contract_type,payment_type
0,2,1,3,Female,0,No,No,3,Yes,No,...,No internet service,No internet service,No internet service,No,19.85,57.2,No,,Month-to-month,Mailed check
1,2,1,3,Female,0,Yes,Yes,4,Yes,No,...,No internet service,No internet service,No internet service,No,20.35,76.35,Yes,,Month-to-month,Mailed check
2,1,1,3,Male,0,No,No,27,Yes,No,...,No internet service,No internet service,No internet service,Yes,19.40,529.8,No,,Month-to-month,Electronic check
3,1,1,3,Male,0,No,No,1,Yes,No,...,No internet service,No internet service,No internet service,Yes,19.85,19.85,Yes,,Month-to-month,Electronic check
4,3,1,3,Female,0,Yes,No,10,Yes,No,...,No internet service,No internet service,No internet service,Yes,19.95,187.75,No,,Month-to-month,Bank transfer (automatic)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,4,3,1,Male,1,Yes,No,72,Yes,Yes,...,No,Yes,Yes,Yes,84.10,5981.65,No,DSL,Two year,Credit card (automatic)
7039,3,3,1,Female,0,Yes,Yes,72,Yes,Yes,...,No,Yes,Yes,Yes,76.80,5468.45,No,DSL,Two year,Bank transfer (automatic)
7040,2,3,1,Female,0,No,No,12,No,No phone service,...,Yes,No,Yes,No,49.85,552.1,No,DSL,Two year,Mailed check
7041,2,3,1,Male,0,Yes,Yes,67,Yes,No,...,Yes,No,Yes,No,67.85,4627.65,No,DSL,Two year,Mailed check


In [28]:
#rename columns
telco_df = df.rename(columns={'payment_type_id': 'payment_type', 'contract_type_id': 'contract_type','internet_service_type_id': 'internet_service_type'})

AttributeError: 'list' object has no attribute 'rename'