In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib

# Load the dataset
df = pd.read_csv("customer_churn_data.csv")

# Display first few rows to check the data
df.head()

# Balance the dataset by downsampling the majority class
df_majority = df[df["Churn"] == "Yes"]
df_minority = df[df["Churn"] == "No"]
df_majority_downsampled = resample(df_majority, replace=False, n_samples=len(df_minority), random_state=42)
df_balanced = pd.concat([df_majority_downsampled, df_minority])

# Convert categorical values to numerical
df_balanced["Gender"] = df_balanced["Gender"].apply(lambda x: 1 if x == "Female" else 0)
df_balanced["Churn"] = df_balanced["Churn"].apply(lambda x: 1 if x == "Yes" else 0)

# Features and target
X = df_balanced[["Age", "Gender", "Tenure", "MonthlyCharges"]]
y = df_balanced["Churn"]

# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling (Standardization)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Logistic Regression model
log_model = LogisticRegression()
log_model.fit(X_train, y_train)

# Predictions
y_pred = log_model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Save the model and scaler to files
joblib.dump(log_model, "model.pkl")
joblib.dump(scaler, "scaler.pkl")


Model Accuracy: 74.47%


['scaler.pkl']