# Churn rate prediction of Telecom sector

In [1]:
%matplotlib inline
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import warnings
warnings.filterwarnings("ignore")


## Read data from sql

In [2]:
%run read_telecom_data_from_sql.ipynb

(4617, 21)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4617 entries, 0 to 4616
Data columns (total 21 columns):
State                   4617 non-null object
Account Length          4617 non-null object
Area Code               4617 non-null object
Phone                   4617 non-null object
International Plan      4617 non-null object
VMail Plan              4617 non-null object
VMail Message           4617 non-null object
Day Mins                4617 non-null object
Day Calls               4617 non-null object
Day Charge              4617 non-null object
Eve Mins                4617 non-null object
Eve Calls               4617 non-null object
Eve Charge              4617 non-null object
Night Mins              4617 non-null object
Night Calls             4617 non-null object
Nights Charge           4617 non-null object
International Mins      4617 non-null object
International Calls     4617 non-null object
International Charge    4617 non-null object
CustServ Calls          461

Dataframe named data is available now

In [3]:
data.isnull().sum()

State                   0
Account Length          0
Area Code               0
Phone                   0
International Plan      0
VMail Plan              0
VMail Message           0
Day Mins                0
Day Calls               0
Day Charge              0
Eve Mins                0
Eve Calls               0
Eve Charge              0
Night Mins              0
Night Calls             0
Nights Charge           0
International Mins      0
International Calls     0
International Charge    0
CustServ Calls          0
Churn                   0
dtype: int64

In [4]:
data.Phone = data.Phone.str.replace('-', '')
data = data.astype({
    'Account Length': 'int64',
    'Area Code': 'int64',
    'Phone': 'int64',
    'VMail Message': 'int64',
    'Day Mins': 'float64',
    'Day Calls': 'int64',
    'Day Charge': 'float64',
    'Eve Mins': 'float64',
    'Eve Calls': 'int64',
    'Eve Charge': 'float64',
    'Night Mins': 'float64',
    'Night Calls': 'int64',
    'Nights Charge': 'float64',
    'International Mins': 'float64',
    'International Calls': 'int64',
    'International Charge': 'float64',
    'CustServ Calls': 'int64'
            })


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4617 entries, 0 to 4616
Data columns (total 21 columns):
State                   4617 non-null object
Account Length          4617 non-null int64
Area Code               4617 non-null int64
Phone                   4617 non-null int64
International Plan      4617 non-null object
VMail Plan              4617 non-null object
VMail Message           4617 non-null int64
Day Mins                4617 non-null float64
Day Calls               4617 non-null int64
Day Charge              4617 non-null float64
Eve Mins                4617 non-null float64
Eve Calls               4617 non-null int64
Eve Charge              4617 non-null float64
Night Mins              4617 non-null float64
Night Calls             4617 non-null int64
Nights Charge           4617 non-null float64
International Mins      4617 non-null float64
International Calls     4617 non-null int64
International Charge    4617 non-null float64
CustServ Calls          4617 non-null 

In [6]:
data = data.apply(lambda x: x.str.strip() if x.name in ['State', 'International Plan', 'VMail Plan', 'Churn'] else x)

In [7]:
data[data['Churn'] == 'True.'].shape

(656, 21)

In [8]:
data[['State', 'International Plan', 'VMail Plan', 'Churn']].head(2)

Unnamed: 0,State,International Plan,VMail Plan,Churn
0,KS,no,yes,False.
1,OH,no,yes,False.


In [9]:
data.State.value_counts().head()

WV    149
AL    114
MN    112
ID    110
OH    110
Name: State, dtype: int64

In [10]:
stateencoder = LabelEncoder()


data_encoded = data.copy()
international_plan = 'International Plan'
vmail_plan = 'VMail Plan'
data_encoded.State = stateencoder.fit_transform(data_encoded.State)
data_encoded[international_plan] = data_encoded[international_plan].replace({'yes': 1, 'no': 0})
data_encoded[vmail_plan] = data_encoded[vmail_plan].replace({'yes': 1, 'no': 0})
data_encoded.Churn = data_encoded.Churn.replace({'False.': 0, 'True.': 1})

stateencoder.inverse_transform(data_encoded.State.unique())
# data_encoded.Churn.head()

array(['KS', 'OH', 'NJ', 'OK', 'AL', 'MA', 'MO', 'LA', 'WV', 'IN', 'RI',
       'IA', 'MT', 'NY', 'ID', 'VT', 'VA', 'TX', 'FL', 'CO', 'AZ', 'SC',
       'NE', 'WY', 'HI', 'IL', 'NH', 'GA', 'AK', 'MD', 'AR', 'WI', 'OR',
       'MI', 'DE', 'UT', 'CA', 'MN', 'SD', 'NC', 'WA', 'NM', 'NV', 'DC',
       'KY', 'ME', 'MS', 'TN', 'PA', 'CT', 'ND'], dtype=object)

In [11]:
data_encoded.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4617 entries, 0 to 4616
Data columns (total 21 columns):
State                   4617 non-null int32
Account Length          4617 non-null int64
Area Code               4617 non-null int64
Phone                   4617 non-null int64
International Plan      4617 non-null int64
VMail Plan              4617 non-null int64
VMail Message           4617 non-null int64
Day Mins                4617 non-null float64
Day Calls               4617 non-null int64
Day Charge              4617 non-null float64
Eve Mins                4617 non-null float64
Eve Calls               4617 non-null int64
Eve Charge              4617 non-null float64
Night Mins              4617 non-null float64
Night Calls             4617 non-null int64
Nights Charge           4617 non-null float64
International Mins      4617 non-null float64
International Calls     4617 non-null int64
International Charge    4617 non-null float64
CustServ Calls          4617 non-null int