In [1]:
# Install required packages (Colab only)
!pip install pandas scikit-learn streamlit joblib

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib


Collecting streamlit
  Downloading streamlit-1.49.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.49.0-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.49.0


In [2]:
from google.colab import files
uploaded=files.upload()

Saving Telco-churn-dataset.csv to Telco-churn-dataset.csv


In [3]:
# ===============================
# Telco Churn ML Pipeline in Colab
# ===============================

# Install dependencies
!pip install scikit-learn pandas joblib

# ---- Imports ----
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score, confusion_matrix
import joblib

# ---- Load dataset ----
# Replace path with your Telco Churn CSV file
df = pd.read_csv("Telco-churn-dataset.csv")

# Target column
TARGET = "Churn"

# Convert target Yes/No -> 1/0
df[TARGET] = df[TARGET].map({"Yes": 1, "No": 0})

# Features
numeric_features = ["tenure", "MonthlyCharges", "TotalCharges"]
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")

categorical_features = [col for col in df.columns if col not in numeric_features + [TARGET, "customerID"]]

# ---- Preprocessing ----
numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)

# ---- Pipeline ----
pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", LogisticRegression(max_iter=1000))
])

# ---- Train/Test Split ----
X = df[numeric_features + categorical_features]
y = df[TARGET]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# ---- Hyperparameter Tuning ----
param_grid = [
    {
        "classifier": [LogisticRegression(max_iter=1000)],
        "classifier__C": [0.01, 0.1, 1.0, 10]
    },
    {
        "classifier": [RandomForestClassifier(random_state=42)],
        "classifier__n_estimators": [100, 200],
        "classifier__max_depth": [None, 5, 10]
    }
]

grid = GridSearchCV(pipeline, param_grid, cv=5, scoring="roc_auc", n_jobs=-1, verbose=2)
grid.fit(X_train, y_train)

# ---- Evaluation ----
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:,1]

print("Best Params:", grid.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# ---- Save Model ----
joblib.dump(best_model, "telco_churn_pipeline.joblib")
print("✅ Pipeline saved as telco_churn_pipeline.joblib")


Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best Params: {'classifier': LogisticRegression(max_iter=1000), 'classifier__C': 10}
Accuracy: 0.8055358410220014
ROC AUC: 0.8410912190963341

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.89      0.87      1035
           1       0.66      0.56      0.60       374

    accuracy                           0.81      1409
   macro avg       0.75      0.73      0.74      1409
weighted avg       0.80      0.81      0.80      1409


Confusion Matrix:
 [[926 109]
 [165 209]]
✅ Pipeline saved as telco_churn_pipeline.joblib


In [4]:
!pip install streamlit pandas scikit-learn joblib




In [5]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib

# Load pipeline
@st.cache_resource
def load_model():
    return joblib.load("telco_churn_pipeline.joblib")

pipeline = load_model()

st.title("📊 Telco Customer Churn Prediction")

# Upload CSV file
uploaded_file = st.file_uploader("Upload CSV file with customer data", type=["csv"])

if uploaded_file:
    data = pd.read_csv(uploaded_file)
    st.write("Preview of uploaded data:", data.head())

    if st.button("Predict"):
        preds = pipeline.predict(data)
        probs = pipeline.predict_proba(data)[:, 1]

        result = data.copy()
        result["Churn_Prediction"] = preds
        result["Churn_Probability"] = probs

        st.write("Predictions:")
        st.dataframe(result.head(20))
else:
    st.info("Please upload a CSV file to get predictions.")


Writing app.py


In [6]:
!pip install streamlit pyngrok joblib pandas


Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.3.0


In [7]:
!ngrok config add-authtoken 2zPsyvEgfyCsMA10TOBi0dPDRfW_XNVXjsxsgjskLFQdU9kv


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from pyngrok import ngrok

# Streamlit run
!streamlit run app.py &

# Public URL expose
public_url = ngrok.connect(8501)
print("🌍 Open this Streamlit App URL:", public_url)



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.106.167.36:8501[0m
[0m


In [None]:
from pyngrok import ngrok

# Run Streamlit
!streamlit run app.py &

# Expose URL
public_url = ngrok.connect(8501)
print("🌍 Streamlit App URL:", public_url)



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.91.63.204:8501[0m
[0m


In [None]:
streamlit run app.py


SyntaxError: invalid syntax (ipython-input-507122745.py, line 1)