In [14]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import precision_score, recall_score, f1_score

In [15]:
data = pd.read_csv('telco_customer_churn.csv')

In [16]:
data.isnull().sum()  # Check for missing values
data.fillna(data.mode().iloc[0], inplace=True)  # Example for categorical columns

In [17]:
data = pd.get_dummies(data, drop_first=True)

In [18]:
print(data.columns)


Index(['SeniorCitizen', 'tenure', 'MonthlyCharges', 'customerID_0003-MKNFE',
       'customerID_0004-TLHLJ', 'customerID_0011-IGKFF',
       'customerID_0013-EXCHZ', 'customerID_0013-MHZWF',
       'customerID_0013-SMEOE', 'customerID_0014-BMAQU',
       ...
       'TotalCharges_996.45', 'TotalCharges_996.85', 'TotalCharges_996.95',
       'TotalCharges_997.65', 'TotalCharges_997.75', 'TotalCharges_998.1',
       'TotalCharges_999.45', 'TotalCharges_999.8', 'TotalCharges_999.9',
       'Churn_Yes'],
      dtype='object', length=13602)


In [19]:
data.columns = data.columns.str.strip()  # Remove leading/trailing spaces

In [20]:
X = data.drop('Churn_Yes', axis=1)  # Independent variables
y = data['Churn_Yes']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
model_lr = LogisticRegression()
model_lr.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [22]:
model_rf = RandomForestClassifier()
model_rf.fit(X_train, y_train)

In [23]:
model_xgb = xgb.XGBClassifier()
model_xgb.fit(X_train, y_train)

In [24]:

models = {'Logistic Regression': model_lr, 'Random Forest': model_rf, 'XGBoost': model_xgb}

for model_name, model in models.items():
    y_pred = model.predict(X_test)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f"{model_name} - Precision: {precision:.2f}, Recall: {recall:.2f}, F1-score: {f1:.2f}")

Logistic Regression - Precision: 0.65, Recall: 0.60, F1-score: 0.62
Random Forest - Precision: 0.70, Recall: 0.45, F1-score: 0.55
XGBoost - Precision: 0.62, Recall: 0.52, F1-score: 0.56


In [25]:
# Save model
with open('model.pkl', 'wb') as file:
    pickle.dump(model_rf, file)