In [3]:
# Customer Churn Classification in Google Colab (Improved Version)

# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

from google.colab import files
from io import StringIO

# Step 2: Upload the dataset
print("📤 Please upload your dataset (CSV file)...")
uploaded = files.upload()

# Step 3: Load the dataset
file_name = list(uploaded.keys())[0]
df = pd.read_csv(StringIO(uploaded[file_name].decode('utf-8')))

# Step 4: Display the first few rows
print("\n📊 First 5 rows of the dataset:")
print(df.head())

# Step 5: Check for missing values
print("\n🔍 Checking for missing values...")
print(df.isnull().sum())

# Step 6: Drop unneeded columns (customerID is just an identifier)
if 'customerID' in df.columns:
    df.drop('customerID', axis=1, inplace=True)

# Step 7: Convert TotalCharges to numeric (some may be blanks or spaces)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
# df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True)
df['TotalCharges'] = df['TotalCharges'].fillna(df['TotalCharges'].median())




# Step 8: Encode categorical variables
print("\n🔁 Encoding categorical variables...")
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

# Step 9: Define features and target
X = df.drop('Churn', axis=1)
y = df['Churn']

# Step 10: Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 11: Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 12: Train Logistic Regression model
model = LogisticRegression(max_iter=500, solver='lbfgs')
model.fit(X_train, y_train)

# Step 13: Predict and evaluate
y_pred = model.predict(X_test)

print("\n✅ Model Accuracy:", accuracy_score(y_test, y_pred))
print("\n📋 Classification Report:\n", classification_report(y_test, y_pred))


📤 Please upload your dataset (CSV file)...


Saving Classify Customer Churn.csv to Classify Customer Churn (2).csv

📊 First 5 rows of the dataset:
   customerID  gender  SeniorCitizen Partner Dependents  tenure PhoneService  \
0  7590-VHVEG  Female              0     Yes         No       1           No   
1  5575-GNVDE    Male              0      No         No      34          Yes   
2  3668-QPYBK    Male              0      No         No       2          Yes   
3  7795-CFOCW    Male              0      No         No      45           No   
4  9237-HQITU  Female              0      No         No       2          Yes   

      MultipleLines InternetService OnlineSecurity  ... DeviceProtection  \
0  No phone service             DSL             No  ...               No   
1                No             DSL            Yes  ...              Yes   
2                No             DSL            Yes  ...               No   
3  No phone service             DSL            Yes  ...              Yes   
4                No     Fiber optic  