In [11]:
import pandas as pd
# dataset with original features
X_original = pd.read_excel("./Datasets/Filtered_features.xlsx")
# dataset with features of reduced Dimensionality
X_pca = pd.read_excel("./Datasets/PCA_features.xlsx")


In [12]:
features_original = X_original.drop(columns=["Churn Value"])

features_pca = X_pca

y_original = X_original["Churn Value"]
y_pca = X_original["Churn Value"]



In [13]:
from sklearn.model_selection import train_test_split
# for original features
X_train_original, X_test_original, y_train_original, y_test_original = train_test_split(features_original, y_original, test_size=0.33, random_state=2)
# for pca features
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(features_pca, y_pca, test_size=0.33, random_state=2)

In [14]:
from sklearn.linear_model import LogisticRegression

# model for original features
model_original = LogisticRegression(max_iter=1000)
model_original.fit(X_train_original, y_train_original)
original_predicted = model_original.predict(X_test_original)

# model for pca features
model_pca = LogisticRegression(max_iter=1000)
model_pca.fit(X_train_pca, y_train_pca)
pca_predicted = model_pca.predict(X_test_pca)


In [15]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# for original features
print("Accuracy Score (with original features): ")
print(accuracy_score(y_test_original, original_predicted), "\n")
print("Precision Score (with original features): ")
print(precision_score(y_test_original, original_predicted), "\n")
print("Recall Score (with original features): ")
print(recall_score(y_test_original, original_predicted), "\n")
print("F1 Score (with original features): ")
print(f1_score(y_test_original, original_predicted), "\n", "\n")

# for pca features
print("Accuracy Score (with pca features): ")
print(accuracy_score(y_test_pca, pca_predicted), "\n")
print("Precision Score (with pca features): ")
print(precision_score(y_test_pca, pca_predicted), "\n")
print("Recall Score (with pca features): ")
print(recall_score(y_test_pca, pca_predicted), "\n")
print("F1 Score (with pca features): ")
print(f1_score(y_test_pca, pca_predicted), "\n")

Accuracy Score (with original features): 
0.8018095648427402 

Precision Score (with original features): 
0.6673228346456693 

Recall Score (with original features): 
0.5380952380952381 

F1 Score (with original features): 
0.5957820738137083 
 

Accuracy Score (with pca features): 
1.0 

Precision Score (with pca features): 
1.0 

Recall Score (with pca features): 
1.0 

F1 Score (with pca features): 
1.0 



In [16]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test_pca, pca_predicted))
print(confusion_matrix(y_test_original, original_predicted))

[[1691    0]
 [   0  630]]
[[1522  169]
 [ 291  339]]


In [18]:
x = X_train_original[['Senior Citizen', 'Partner', 'Dependents', 'Online Security', 'Online Backup', 'Device Protection', 'Tech Support', 'Streaming TV', 'Paperless Billing', 'Monthly Charges', 'Total Charges', 'DSL_Service', 'month-to-month_contract', 'one-year_contract', 'two-year_contract', 'credit-card-auto_paymentmethod', 'electronic-check_paymentmethod']]
x_test = X_test_original[['Senior Citizen', 'Partner', 'Dependents', 'Online Security', 'Online Backup', 'Device Protection', 'Tech Support', 'Streaming TV', 'Paperless Billing', 'Monthly Charges', 'Total Charges', 'DSL_Service', 'month-to-month_contract', 'one-year_contract', 'two-year_contract', 'credit-card-auto_paymentmethod', 'electronic-check_paymentmethod']]

new_model = LogisticRegression(max_iter=1000)
new_model.fit(x, y_train_original)
new_predicted = new_model.predict(x_test)

print(accuracy_score(y_test_original, new_predicted))
print(precision_score(y_test_original, new_predicted))
print(recall_score(y_test_original, new_predicted))
print(f1_score(y_test_original, new_predicted))


0.8048255062473072
0.6759443339960238
0.5396825396825397
0.6001765225066196


In [21]:
import pickle

# Save the trained logistic regression model
with open('logreg_model.pkl', 'wb') as f:
    pickle.dump(new_model, f)
