In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Load Dataset
try:
    data = pd.read_csv('/content/Customer-Churn.csv')  # Adjust path if needed
    print("Dataset loaded successfully!")
except FileNotFoundError:
    print("Error: Dataset file not found. Please check the file path.")
    exit()

# Handle Missing Values
# Replace numerical missing values with median
num_cols = data.select_dtypes(include=['float64', 'int64']).columns
data[num_cols] = data[num_cols].fillna(data[num_cols].median())

# Replace categorical missing values with mode
cat_cols = data.select_dtypes(include=['object']).columns
data[cat_cols] = data[cat_cols].fillna(data[cat_cols].mode().iloc[0])

# Feature Engineering: Create 'TotalCharges' as a new feature
if 'MonthlyCharges' in data.columns and 'tenure' in data.columns:
    data['TotalCharges'] = data['MonthlyCharges'] * data['tenure']
else:
    print("Warning: Columns 'MonthlyCharges' or 'tenure' not found. Skipping 'TotalCharges' calculation.")

# Ensure 'Churn' is binary (convert 'Yes'/'No' to 1/0)
if 'Churn' in data.columns:
    if data['Churn'].dtype == 'object':
        data['Churn'] = data['Churn'].map({'Yes': 1, 'No': 0})
    elif not data['Churn'].isin([0, 1]).all():
        print("Error: 'Churn' column contains invalid values.")
        exit()
else:
    print("Error: 'Churn' column not found.")
    exit()

# Encode Categorical Variables
data = pd.get_dummies(data, drop_first=True)

# Split Data into Features (X) and Target (y)
X = data.drop('Churn', axis=1)
y = data['Churn']

# Split Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression Model
try:
    model = LogisticRegression(max_iter=1000, random_state=42)  # Increase iterations to avoid convergence issues
    model.fit(X_train, y_train)
except Exception as e:
    print(f"Error during model training: {e}")
    exit()

# Predict on Test Data
try:
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]
except Exception as e:
    print(f"Error during prediction: {e}")
    exit()

# Evaluate the Model
try:
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("ROC-AUC Score:", roc_auc_score(y_test, y_prob))
except Exception as e:
    print(f"Error during evaluation: {e}")
    exit()


Dataset loaded successfully!


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy: 0.8112136266855926
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.89      0.87      1036
           1       0.66      0.59      0.62       373

    accuracy                           0.81      1409
   macro avg       0.76      0.74      0.75      1409
weighted avg       0.81      0.81      0.81      1409

ROC-AUC Score: 0.8598872234931216


In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))


Accuracy: 0.8112136266855926
Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.89      0.87      1036
           1       0.66      0.59      0.62       373

    accuracy                           0.81      1409
   macro avg       0.76      0.74      0.75      1409
weighted avg       0.81      0.81      0.81      1409

ROC-AUC Score: 0.8598872234931216


In [None]:
y_pred = model.predict(X_test)
print("Predictions on test set:\n", y_pred[:10])  # Show first 10 predictions

Predictions on test set:
 [1 0 0 1 0 0 0 0 0 0]


In [None]:
try:
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train, y_train)
    print("Model trained successfully!")
except Exception as e:
    print(f"Error during model training: {e}")
    exit()


Model trained successfully!


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
