In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('TelcoCustomerChurn.csv')
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [4]:
df['Churn'].value_counts()

Churn
No     5174
Yes    1869
Name: count, dtype: int64

In [5]:
#Average Tenure by Churn
df.groupby('Churn')['tenure'].mean()

Churn
No     37.569965
Yes    17.979133
Name: tenure, dtype: float64

In [6]:
df["tenure_group"] = pd.cut(
    df["tenure"],
    bins=[0, 12, 24, 48, 100],
    labels=["0-1 year", "1-2 year", "2-4 year", "4+ year"]
)


In [7]:
pd.crosstab(df["tenure_group"], df["Churn"])


Churn,No,Yes
tenure_group,Unnamed: 1_level_1,Unnamed: 2_level_1
0-1 year,1138,1037
1-2 year,730,294
2-4 year,1269,325
4+ year,2026,213


In [8]:
pd.crosstab(df['Contract'], df['Churn'])

Churn,No,Yes
Contract,Unnamed: 1_level_1,Unnamed: 2_level_1
Month-to-month,2220,1655
One year,1307,166
Two year,1647,48


In [9]:
pd.crosstab(df["Contract"], df["Churn"], normalize="index")


Churn,No,Yes
Contract,Unnamed: 1_level_1,Unnamed: 2_level_1
Month-to-month,0.572903,0.427097
One year,0.887305,0.112695
Two year,0.971681,0.028319


In [10]:
pd.crosstab(df["InternetService"], df["Churn"])


Churn,No,Yes
InternetService,Unnamed: 1_level_1,Unnamed: 2_level_1
DSL,1962,459
Fiber optic,1799,1297
No,1413,113


In [11]:
pd.crosstab(df["InternetService"], df["Churn"], normalize="index")


Churn,No,Yes
InternetService,Unnamed: 1_level_1,Unnamed: 2_level_1
DSL,0.810409,0.189591
Fiber optic,0.581072,0.418928
No,0.92595,0.07405


In [12]:
pd.crosstab(df["TechSupport"], df["Churn"])


Churn,No,Yes
TechSupport,Unnamed: 1_level_1,Unnamed: 2_level_1
No,2027,1446
No internet service,1413,113
Yes,1734,310


In [13]:
pd.crosstab(df["TechSupport"], df["Churn"], normalize="index")


Churn,No,Yes
TechSupport,Unnamed: 1_level_1,Unnamed: 2_level_1
No,0.583645,0.416355
No internet service,0.92595,0.07405
Yes,0.848337,0.151663


In [14]:
df.groupby("Churn")["MonthlyCharges"].mean()


Churn
No     61.265124
Yes    74.441332
Name: MonthlyCharges, dtype: float64

In [16]:
pd.crosstab(df["PaymentMethod"], df["Churn"])



Churn,No,Yes
PaymentMethod,Unnamed: 1_level_1,Unnamed: 2_level_1
Bank transfer (automatic),1286,258
Credit card (automatic),1290,232
Electronic check,1294,1071
Mailed check,1304,308


In [17]:
pd.crosstab(df["PaymentMethod"], df["Churn"], normalize="index")


Churn,No,Yes
PaymentMethod,Unnamed: 1_level_1,Unnamed: 2_level_1
Bank transfer (automatic),0.832902,0.167098
Credit card (automatic),0.847569,0.152431
Electronic check,0.547146,0.452854
Mailed check,0.808933,0.191067


In [18]:
X = df.drop(columns=["Churn", "customerID"])


In [19]:
y = df["Churn"].map({"Yes": 1, "No": 0})


In [20]:
X_encoded = pd.get_dummies(X, drop_first=True)


In [21]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42, stratify=y
)


In [22]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)


In [23]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

y_prob = model.predict_proba(X_test_scaled)[:, 1]
y_pred = (y_prob >= 0.5).astype(int)


In [24]:
y_prob = model.predict_proba(X_test)[:, 1]




In [25]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

y_pred_05 = (y_prob >= 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred_05))
print("Precision:", precision_score(y_test, y_pred_05))
print("Recall:", recall_score(y_test, y_pred_05))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_05))


Accuracy: 0.7047551454932577
Precision: 0.4675925925925926
Recall: 0.8101604278074866
Confusion Matrix:
 [[690 345]
 [ 71 303]]


In [26]:
y_pred_04 = (y_prob >= 0.4).astype(int)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_04))

Confusion Matrix:
 [[686 349]
 [ 69 305]]


In [28]:
y_pred_03 = (y_prob >= 0.3).astype(int)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_03))

Confusion Matrix:
 [[679 356]
 [ 68 306]]


In [29]:
y_pred_02 = (y_prob >= 0.2).astype(int)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_02))

Confusion Matrix:
 [[671 364]
 [ 67 307]]


In [30]:
y_pred_06 = (y_prob >= 0.6).astype(int)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_06))

Confusion Matrix:
 [[696 339]
 [ 73 301]]
