In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


**Step 1: Data Gathering**

In [7]:
telcom = pd.read_csv('gdrive/MyDrive/Semester 7/Deep Learning/Lab/data/telcom.csv')
telcom

Unnamed: 0,state,account length,area code,phone number,international plan,voice mail plan,number vmail messages,total day minutes,total day calls,total day charge,...,total eve calls,total eve charge,total night minutes,total night calls,total night charge,total intl minutes,total intl calls,total intl charge,customer service calls,churn
0,KS,128,415,382-4657,no,yes,25,265.1,110,45.07,...,99,16.78,244.7,91,11.01,10.0,3,2.70,1,False
1,OH,107,415,371-7191,no,yes,26,161.6,123,27.47,...,103,16.62,254.4,103,11.45,13.7,3,3.70,1,False
2,NJ,137,415,358-1921,no,no,0,243.4,114,41.38,...,110,10.30,162.6,104,7.32,12.2,5,3.29,0,False
3,OH,84,408,375-9999,yes,no,0,299.4,71,50.90,...,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False
4,OK,75,415,330-6626,yes,no,0,166.7,113,28.34,...,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3328,AZ,192,415,414-4276,no,yes,36,156.2,77,26.55,...,126,18.32,279.1,83,12.56,9.9,6,2.67,2,False
3329,WV,68,415,370-3271,no,no,0,231.1,57,39.29,...,55,13.04,191.3,123,8.61,9.6,4,2.59,3,False
3330,RI,28,510,328-8230,no,no,0,180.8,109,30.74,...,58,24.55,191.9,91,8.64,14.1,6,3.81,2,False
3331,CT,184,510,364-6381,yes,no,0,213.8,105,36.35,...,84,13.57,139.2,137,6.26,5.0,10,1.35,2,False


**Step 2: Data Cleaning & Analysis**

In [8]:
telcom.isnull().sum()

state                     0
account length            0
area code                 0
phone number              0
international plan        0
voice mail plan           0
number vmail messages     0
total day minutes         0
total day calls           0
total day charge          0
total eve minutes         0
total eve calls           0
total eve charge          0
total night minutes       0
total night calls         0
total night charge        0
total intl minutes        0
total intl calls          0
total intl charge         0
customer service calls    0
churn                     0
dtype: int64

In [63]:
fig = px.box(telcom, x='churn', y = 'total day charge')

fig.update_yaxes(title_text='Total day charge', row=1, col=1)

fig.update_xaxes(title_text='churn', row=1, col=1)

fig.update_layout(autosize=True, width=750, height=600,
    title_font=dict(size=25, family='Courier'),
    title='<b>total day charge vs Churn</b>',
)

fig.show()

In [64]:
telcom = telcom[telcom['total day charge'] >= 7.63]
telcom = telcom[telcom['total day charge'] <= 52.77]
telcom.shape

(3285, 21)

In [65]:
fig = px.box(telcom, x='churn', y = 'total day charge')

fig.update_yaxes(title_text='Total day charge', row=1, col=1)

fig.update_xaxes(title_text='churn', row=1, col=1)

fig.update_layout(autosize=True, width=750, height=600,
    title_font=dict(size=25, family='Courier'),
    title='<b>total day charge vs Churn</b>',
)

fig.show()

In [95]:
international_labels = ['No International Plan', 'International Plan']
voice_labels = ['No Voice Plan', 'Voice Plan']
churn_labels = ['False', 'True']

fig = make_subplots(rows=1, cols=3, specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=international_labels, values=telcom['international plan'].value_counts(), name="International Plan"), 1, 1)
fig.add_trace(go.Pie(labels=voice_labels, values=telcom['voice mail plan'].value_counts(), name="Voice Plan"), 1, 2)
fig.add_trace(go.Pie(labels=churn_labels, values=telcom['churn'].value_counts(), name="Churn"), 1, 3)

fig.update_traces(hole=.4, hoverinfo="label+percent+name", textfont_size=16)

fig.update_layout(
    title_text="International Plan, Voice Mail Plan and Churn Distributions",
    annotations=[dict(text='International Plan', x=0.05, y=0.5, font_size=20, showarrow=False),
                 dict(text='Voice Plan', x=0.5, y=0.5, font_size=20, showarrow=False),
                 dict(text='Churn', x=0.88, y=0.5, font_size=20, showarrow=False)])
fig.show()

**Step 3: Data Selection**

In [75]:
x = telcom.drop(['state', 'area code', 'phone number', 'customer service calls', 'churn'], axis=1)
x

Unnamed: 0,account length,international plan,voice mail plan,number vmail messages,total day minutes,total day calls,total day charge,total eve minutes,total eve calls,total eve charge,total night minutes,total night calls,total night charge,total intl minutes,total intl calls,total intl charge
0,128,no,yes,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.70
1,107,no,yes,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.70
2,137,no,no,0,243.4,114,41.38,121.2,110,10.30,162.6,104,7.32,12.2,5,3.29
3,84,yes,no,0,299.4,71,50.90,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78
4,75,yes,no,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3328,192,no,yes,36,156.2,77,26.55,215.5,126,18.32,279.1,83,12.56,9.9,6,2.67
3329,68,no,no,0,231.1,57,39.29,153.4,55,13.04,191.3,123,8.61,9.6,4,2.59
3330,28,no,no,0,180.8,109,30.74,288.8,58,24.55,191.9,91,8.64,14.1,6,3.81
3331,184,yes,no,0,213.8,105,36.35,159.6,84,13.57,139.2,137,6.26,5.0,10,1.35


In [76]:
y = telcom['churn']
y

0       False
1       False
2       False
3       False
4       False
        ...  
3328    False
3329    False
3330    False
3331    False
3332    False
Name: churn, Length: 3285, dtype: bool

**Step 4: Data Transformation**

In [77]:
x['international plan'] = x['international plan'].map({'yes':1 ,'no':0})
x['voice mail plan'] = x['voice mail plan'].map({'yes':1 ,'no':0})
x

Unnamed: 0,account length,international plan,voice mail plan,number vmail messages,total day minutes,total day calls,total day charge,total eve minutes,total eve calls,total eve charge,total night minutes,total night calls,total night charge,total intl minutes,total intl calls,total intl charge
0,128,0,1,25,265.1,110,45.07,197.4,99,16.78,244.7,91,11.01,10.0,3,2.70
1,107,0,1,26,161.6,123,27.47,195.5,103,16.62,254.4,103,11.45,13.7,3,3.70
2,137,0,0,0,243.4,114,41.38,121.2,110,10.30,162.6,104,7.32,12.2,5,3.29
3,84,1,0,0,299.4,71,50.90,61.9,88,5.26,196.9,89,8.86,6.6,7,1.78
4,75,1,0,0,166.7,113,28.34,148.3,122,12.61,186.9,121,8.41,10.1,3,2.73
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3328,192,0,1,36,156.2,77,26.55,215.5,126,18.32,279.1,83,12.56,9.9,6,2.67
3329,68,0,0,0,231.1,57,39.29,153.4,55,13.04,191.3,123,8.61,9.6,4,2.59
3330,28,0,0,0,180.8,109,30.74,288.8,58,24.55,191.9,91,8.64,14.1,6,3.81
3331,184,1,0,0,213.8,105,36.35,159.6,84,13.57,139.2,137,6.26,5.0,10,1.35


In [78]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=0.7, test_size=0.3, random_state=100)

len(X_train)

2299

In [79]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train.shape

(2299, 16)

**Step 5: Data Mining**

In [82]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

logreg = LogisticRegression(max_iter = 1000, class_weight = 'balanced')

logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)

In [92]:
from pprint import pprint
pprint(metrics.classification_report(y_true=y_test, y_pred=y_pred))

('              precision    recall  f1-score   support\n'
 '\n'
 '       False       0.93      0.73      0.82       837\n'
 '        True       0.31      0.68      0.42       149\n'
 '\n'
 '    accuracy                           0.72       986\n'
 '   macro avg       0.62      0.71      0.62       986\n'
 'weighted avg       0.83      0.72      0.76       986\n')


**Step 6: Pattern Evaluation**

In [84]:
feature_importance = pd.DataFrame({"Feature":x.columns.tolist(),"Coefficients":logreg.coef_[0]})
feature_importance.sort_values(by = 'Coefficients')

Unnamed: 0,Feature,Coefficients
2,voice mail plan,-1.387432
14,total intl calls,-1.282416
0,account length,-0.358854
8,total eve calls,-0.14673
11,total night calls,-0.111969
5,total day calls,0.34643
10,total night minutes,0.405027
12,total night charge,0.407099
13,total intl minutes,0.581725
15,total intl charge,0.58419


**Step 7: Knowledge Representation**
- Based on the Patterns Evaluated, it is found that customers having 'international plan' are less likely to churn
- Also, more the number of vmail messages, total evening minutes & total day minutes, less the number of customers churned

