In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

## Full model

In [5]:
# Define features (X) and target response (y)
X_train = train.drop(columns=['ID', 'Customer_Churn'])
y_train = train['Customer_Churn']

X_test = test.drop(columns=['ID', 'Customer_Churn'])
y_test = test['Customer_Churn']

# Initialize and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.588

Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.82      0.70       714
           1       0.54      0.27      0.36       536

    accuracy                           0.59      1250
   macro avg       0.57      0.55      0.53      1250
weighted avg       0.57      0.59      0.55      1250



## Subset model

Using features that was selected from feature selection notebook.
Note: next step, might be to consider only including 1 variable between 'Satisfaction_with_AI_Services' and 'AI_Personalization_Effectiveness' because their correlation is 0.96

In [3]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# List of columns to select
selected_columns = ['Age', 'AI_Interaction_Level', 'AI_Response_Time', 'Overall_Usage_Frequency']

# Define features (X) and target response (y)
X_train = train[selected_columns]
y_train = train['Customer_Churn']

X_test = test[selected_columns]
y_test = test['Customer_Churn']

# Initialize and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.5816

Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.86      0.70       714
           1       0.53      0.21      0.30       536

    accuracy                           0.58      1250
   macro avg       0.56      0.54      0.50      1250
weighted avg       0.57      0.58      0.53      1250



In [4]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# List of columns to select
selected_columns = ['Age', 'AI_Interaction_Level', 'Satisfaction_with_AI_Services', 
                    'AI_Personalization_Effectiveness', 'Change_in_Usage_Patterns']

# Define features (X) and target response (y)
X_train = train[selected_columns]
y_train = train['Customer_Churn']

X_test = test[selected_columns]
y_test = test['Customer_Churn']

# Initialize and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.5872

Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.83      0.70       714
           1       0.54      0.26      0.35       536

    accuracy                           0.59      1250
   macro avg       0.57      0.55      0.52      1250
weighted avg       0.57      0.59      0.55      1250



In [5]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# List of columns to select
selected_columns = ['Age', 'AI_Interaction_Level', 'Satisfaction_with_AI_Services',
                     'AI_Personalization_Effectiveness', 'Customer_Service_Interactions']

# Define features (X) and target response (y)
X_train = train[selected_columns]
y_train = train['Customer_Churn']

X_test = test[selected_columns]
y_test = test['Customer_Churn']

# Initialize and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))



Accuracy: 0.5752

Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.83      0.69       714
           1       0.51      0.24      0.33       536

    accuracy                           0.58      1250
   macro avg       0.55      0.53      0.51      1250
weighted avg       0.56      0.58      0.53      1250

