<a href="https://colab.research.google.com/github/chethan6829/chethan6829/blob/main/CUSTOMER_CHURN_PREDICTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, roc_auc_score
from io import StringIO

# ------------------------------
# STEP 1: CSV DATA (INLINE)
# ------------------------------
csv_data = """customer_id,tenure,monthly_charges,total_charges,contract_type,tech_support,churn
1,1,29.85,29.85,Month-to-month,No,Yes
2,34,56.95,1889.5,One year,Yes,No
3,2,53.85,108.15,Month-to-month,No,Yes
4,45,42.30,1840.75,Two year,Yes,No
5,5,70.70,351.50,Month-to-month,No,Yes
6,60,99.65,5689.65,Two year,Yes,No
7,10,89.10,890.10,Month-to-month,No,Yes
8,55,39.75,2186.25,One year,Yes,No
"""

df = pd.read_csv(StringIO(csv_data))

# ------------------------------
# STEP 2: EDA
# ------------------------------
print("\nDataset Preview:\n", df.head())
print("\nChurn Distribution:\n", df['churn'].value_counts())

# ------------------------------
# STEP 3: ENCODING
# ------------------------------
le = LabelEncoder()
df['contract_type'] = le.fit_transform(df['contract_type'])
df['tech_support'] = le.fit_transform(df['tech_support'])
df['churn'] = le.fit_transform(df['churn'])

# ------------------------------
# STEP 4: FEATURE SELECTION
# ------------------------------
X = df.drop(['customer_id', 'churn'], axis=1)
y = df['churn']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# ------------------------------
# STEP 5: LOGISTIC REGRESSION
# ------------------------------
lr = LogisticRegression()
lr.fit(X_train, y_train)

lr_pred = lr.predict(X_test)
lr_prob = lr.predict_proba(X_test)[:, 1]

# ------------------------------
# STEP 6: RANDOM FOREST
# ------------------------------
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

rf_pred = rf.predict(X_test)
rf_prob = rf.predict_proba(X_test)[:, 1]

# ------------------------------
# STEP 7: EVALUATION
# ------------------------------
print("\n--- Logistic Regression ---")
print("Accuracy:", round(accuracy_score(y_test, lr_pred), 2))
print("Recall:", round(recall_score(y_test, lr_pred), 2))
print("ROC-AUC:", round(roc_auc_score(y_test, lr_prob), 2))

print("\n--- Random Forest ---")
print("Accuracy:", round(accuracy_score(y_test, rf_pred), 2))
print("Recall:", round(recall_score(y_test, rf_pred), 2))
print("ROC-AUC:", round(roc_auc_score(y_test, rf_prob), 2))


Dataset Preview:
    customer_id  tenure  monthly_charges  total_charges   contract_type  \
0            1       1            29.85          29.85  Month-to-month   
1            2      34            56.95        1889.50        One year   
2            3       2            53.85         108.15  Month-to-month   
3            4      45            42.30        1840.75        Two year   
4            5       5            70.70         351.50  Month-to-month   

  tech_support churn  
0           No   Yes  
1          Yes    No  
2           No   Yes  
3          Yes    No  
4           No   Yes  

Churn Distribution:
 churn
Yes    4
No     4
Name: count, dtype: int64

--- Logistic Regression ---
Accuracy: 1.0
Recall: 1.0
ROC-AUC: 1.0

--- Random Forest ---
Accuracy: 1.0
Recall: 1.0
ROC-AUC: 1.0
