In [5]:
import pandas as pd
import numpy as np

In [6]:
# Load Data CSV

df = pd.read_csv('telco_customer_churn.csv')

In [7]:
df.dtypes

customerID           object
gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                object
dtype: object

In [8]:
# Merubah nama kolom customerID menjadi CustID

df = df.rename(columns={'customerID':'CustID'})
df

Unnamed: 0,CustID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


In [9]:
# Merubah Semua Kolom Menjadi Lowercase

df.columns= df.columns.str.lower()
df.columns

Index(['custid', 'gender', 'seniorcitizen', 'partner', 'dependents', 'tenure',
       'phoneservice', 'multiplelines', 'internetservice', 'onlinesecurity',
       'onlinebackup', 'deviceprotection', 'techsupport', 'streamingtv',
       'streamingmovies', 'contract', 'paperlessbilling', 'paymentmethod',
       'monthlycharges', 'totalcharges', 'churn'],
      dtype='object')

In [10]:
df['paymentmethod'] = df['paymentmethod'].str.replace('[()]','',regex=True)

In [11]:
df['paymentmethod'] = df['paymentmethod'].str.replace(' automatic','',regex=True)

In [12]:
df['paymentmethod'].unique()

array(['Electronic check', 'Mailed check', 'Bank transfer', 'Credit card'],
      dtype=object)

## ONE-HOT ENCODING

In [13]:
from sklearn.preprocessing import OrdinalEncoder

ord_enc = OrdinalEncoder()

In [14]:
df_phone_service = df[['phoneservice']]

In [15]:
df_phone_service

Unnamed: 0,phoneservice
0,No
1,Yes
2,Yes
3,No
4,Yes
...,...
7038,Yes
7039,Yes
7040,No
7041,Yes


In [16]:
df_phone_service_encode = pd.get_dummies(df_phone_service,columns=['phoneservice'],prefix='phoneservice')

In [17]:
df_phone_service_encode

Unnamed: 0,phoneservice_No,phoneservice_Yes
0,1,0
1,0,1
2,0,1
3,1,0
4,0,1
...,...,...
7038,0,1
7039,0,1
7040,1,0
7041,0,1


In [18]:
pd.concat([df_phone_service,df_phone_service_encode],axis=1)

Unnamed: 0,phoneservice,phoneservice_No,phoneservice_Yes
0,No,1,0
1,Yes,0,1
2,Yes,0,1
3,No,1,0
4,Yes,0,1
...,...,...,...
7038,Yes,0,1
7039,Yes,0,1
7040,No,1,0
7041,Yes,0,1


## NORMALIZATION

In [19]:
from sklearn.preprocessing import MinMaxScaler

min_max_scaler = MinMaxScaler()

In [20]:
df['monthlycharge_norm'] = min_max_scaler.fit_transform(df[['monthlycharges']])

In [21]:
df.describe()

Unnamed: 0,seniorcitizen,tenure,monthlycharges,monthlycharge_norm
count,7043.0,7043.0,7043.0,7043.0
mean,0.162147,32.371149,64.761692,0.462803
std,0.368612,24.559481,30.090047,0.299403
min,0.0,0.0,18.25,0.0
25%,0.0,9.0,35.5,0.171642
50%,0.0,29.0,70.35,0.518408
75%,0.0,55.0,89.85,0.712438
max,1.0,72.0,118.75,1.0


## STANDARDIZATION

In [29]:
from sklearn.preprocessing import StandardScaler

std_scaler = StandardScaler()

In [31]:
df['totalcharges'] = std_scaler.fit_transform(df[['totalcharges']])

In [33]:
df.describe()

Unnamed: 0,seniorcitizen,tenure,monthlycharges,totalcharges,monthlycharge_norm
count,7043.0,7043.0,7043.0,7032.0,7043.0
mean,0.162147,32.371149,64.761692,9.15713e-18,0.462803
std,0.368612,24.559481,30.090047,1.000071,0.299403
min,0.0,0.0,18.25,-0.9990692,0.0
25%,0.0,9.0,35.5,-0.8302488,0.171642
50%,0.0,29.0,70.35,-0.3908151,0.518408
75%,0.0,55.0,89.85,0.6668271,0.712438
max,1.0,72.0,118.75,2.824261,1.0


In [37]:
df

Unnamed: 0,custid,gender,seniorcitizen,partner,dependents,tenure,phoneservice,multiplelines,internetservice,onlinesecurity,...,techsupport,streamingtv,streamingmovies,contract,paperlessbilling,paymentmethod,monthlycharges,totalcharges,churn,monthlycharge_norm
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,Month-to-month,Yes,Electronic check,29.85,-0.994194,No,0.115423
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,No,No,One year,No,Mailed check,56.95,-0.173740,No,0.385075
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,Month-to-month,Yes,Mailed check,53.85,-0.959649,Yes,0.354229
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,No,No,One year,No,Bank transfer,42.30,-0.195248,No,0.239303
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Electronic check,70.70,-0.940457,Yes,0.521891
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,-0.129180,No,0.662189
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,No,Yes,Yes,One year,Yes,Credit card,103.20,2.241056,No,0.845274
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,Month-to-month,Yes,Electronic check,29.60,-0.854514,No,0.112935
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,Month-to-month,Yes,Mailed check,74.40,-0.872095,Yes,0.558706
