In [216]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn import tree

In [217]:
df = pd.read_csv('Telecommunications.csv')
df.head()

Unnamed: 0,Customer_ID,Monthly_Bill,Network_Usage,Plan_Type,Customer_Churn
0,CUST00001,124.87,815.0,Prepaid,
1,CUST00002,286.2,12.0,Prepaid,0.0
2,CUST00003,224.96,318.0,Postpaid,
3,CUST00004,187.62,,Prepaid,
4,CUST00005,63.69,,Prepaid,


In [218]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Customer_ID     900 non-null    object 
 1   Monthly_Bill    900 non-null    float64
 2   Network_Usage   900 non-null    float64
 3   Plan_Type       900 non-null    object 
 4   Customer_Churn  900 non-null    float64
dtypes: float64(3), object(2)
memory usage: 39.2+ KB


In [219]:
df.isnull().sum()

Unnamed: 0,0
Customer_ID,100
Monthly_Bill,100
Network_Usage,100
Plan_Type,100
Customer_Churn,100


In [220]:
# filling missing values
# Monthly_Bill

mean = df['Monthly_Bill'].mean()
median = df['Monthly_Bill'].median()
print(mean)
print(median)
df['Monthly_Bill'].fillna(mean, inplace=True)

158.2296111111111
160.82


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Monthly_Bill'].fillna(mean, inplace=True)


In [221]:
# filling missing values
# Network_Usage

mean = df['Network_Usage'].mean()
median = df['Network_Usage'].median()
print(mean)
print(median)
df['Network_Usage'].fillna(mean, inplace=True)

508.3177777777778
499.5


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Network_Usage'].fillna(mean, inplace=True)


In [222]:
df['Plan_Type'].value_counts()

Unnamed: 0_level_0,count
Plan_Type,Unnamed: 1_level_1
Postpaid,477
Prepaid,423


In [223]:
# filling missing values
# Plan_Type (Postpaid)

# df['Plan_Type'].fillna('Postpaid', inplace=True)

In [224]:
# filling missing values
# Plan_Type (Prepaid)

df['Plan_Type'].fillna('Prepaid', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Plan_Type'].fillna('Prepaid', inplace=True)


In [225]:
df['Customer_Churn'].value_counts()

Unnamed: 0_level_0,count
Customer_Churn,Unnamed: 1_level_1
0.0,713
1.0,187


In [226]:
# # filling missing values
# # Customer_Churn (0)

df['Customer_Churn'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Customer_Churn'].fillna(0, inplace=True)


In [227]:
# filling missing values
# Customer_Churn (1)

# df['Customer_Churn'].fillna(1, inplace=True)

In [228]:
df.drop('Customer_ID', axis=1, inplace=True)
df.head()

Unnamed: 0,Monthly_Bill,Network_Usage,Plan_Type,Customer_Churn
0,124.87,815.0,Prepaid,0.0
1,286.2,12.0,Prepaid,0.0
2,224.96,318.0,Postpaid,0.0
3,187.62,508.317778,Prepaid,0.0
4,63.69,508.317778,Prepaid,0.0


In [229]:
from sklearn.preprocessing import LabelEncoder
LE = LabelEncoder()

In [230]:
df['Plan_Type'] = LE.fit_transform(df['Plan_Type'])
df

Unnamed: 0,Monthly_Bill,Network_Usage,Plan_Type,Customer_Churn
0,124.87,815.000000,1,0.0
1,286.20,12.000000,1,0.0
2,224.96,318.000000,0,0.0
3,187.62,508.317778,1,0.0
4,63.69,508.317778,1,0.0
...,...,...,...,...
995,45.64,364.000000,0,0.0
996,276.85,891.000000,1,0.0
997,58.31,427.000000,1,0.0
998,286.07,21.000000,1,0.0


In [231]:
df['Customer_Churn'] = df['Customer_Churn'].astype(int)
df.head()

Unnamed: 0,Monthly_Bill,Network_Usage,Plan_Type,Customer_Churn
0,124.87,815.0,1,0
1,286.2,12.0,1,0
2,224.96,318.0,0,0
3,187.62,508.317778,1,0
4,63.69,508.317778,1,0


In [232]:
corr_matrix = df.corrwith(df['Customer_Churn'])

print(corr_matrix)

Monthly_Bill     -0.021102
Network_Usage    -0.064957
Plan_Type         0.016426
Customer_Churn    1.000000
dtype: float64


In [233]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state = 42)
X = df.drop('Customer_Churn', axis=1)
y = df['Customer_Churn']
X_resampled, y_resampled = smote.fit_resample(X, y)

In [234]:
print(X_resampled.shape)
print(y_resampled.shape)

(1626, 3)
(1626,)


In [235]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [236]:
model  = LogisticRegression()
model.fit(X_train, y_train)

In [237]:
y_pred = model.predict(X_test)
y_pred

array([1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,

In [238]:
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.6411042944785276
[[ 94  68]
 [ 49 115]]
              precision    recall  f1-score   support

           0       0.66      0.58      0.62       162
           1       0.63      0.70      0.66       164

    accuracy                           0.64       326
   macro avg       0.64      0.64      0.64       326
weighted avg       0.64      0.64      0.64       326



In [239]:
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)

In [240]:
y_tpred = tree_model.predict(X_test)
y_tpred

array([1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1,
       0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0,

In [241]:
print(accuracy_score(y_test, y_tpred))
print(confusion_matrix(y_test, y_tpred))
print(classification_report(y_test, y_tpred))

0.7208588957055214
[[122  40]
 [ 51 113]]
              precision    recall  f1-score   support

           0       0.71      0.75      0.73       162
           1       0.74      0.69      0.71       164

    accuracy                           0.72       326
   macro avg       0.72      0.72      0.72       326
weighted avg       0.72      0.72      0.72       326

