In [1]:
!pip install streamlit==1.39 joblib scikit-learn



In [3]:
pipe = joblib.load("gbm_pipeline.pkl")

NameError: name 'joblib' is not defined

In [4]:
%%writefile app.py
import joblib
import pandas as pd
import streamlit as st
from sklearn.base import BaseEstimator, TransformerMixin

# --- Custom transformer used in pipeline ---
class Winsorizer(BaseEstimator, TransformerMixin):
    def __init__(self, cols, lower=0.01, upper=0.99):
        self.cols, self.lower, self.upper = cols, lower, upper
        self.bounds_ = {}
    def fit(self, X, y=None):
        X = X.copy()
        for c in self.cols:
            if c in X and pd.api.types.is_numeric_dtype(X[c]):
                lo, hi = X[c].quantile(self.lower), X[c].quantile(self.upper)
                self.bounds_[c] = (lo, hi)
        return self
    def transform(self, X):
        X = X.copy()
        for c,(lo,hi) in self.bounds_.items():
            if c in X and pd.api.types.is_numeric_dtype(X[c]):
                X[c] = X[c].clip(lo, hi)
        return X

# --- Load pipeline ---
pipe = joblib.load("/content/drive/My Drive/FDM Project Files/models/gradiant_boosting/gbm_pipeline.pkl")

st.set_page_config(page_title="Attrition Risk (GBM)", page_icon="🧑‍💼", layout="centered")
st.title("🧑‍💼 Employee Attrition Risk Prediction")

# --- Inputs ---
col1,col2 = st.columns(2)
with col1:
    age = st.number_input("Age",18,60,30)
    years_company = st.number_input("Years at Company",0,60,3)
    monthly_income = st.number_input("Monthly Income",0,20000,6000,step=100)
    num_prom = st.number_input("Number of Promotions",0,20,0)
    dist_home = st.number_input("Distance from Home (miles)",0,300,10)
    tenure = st.number_input("Company Tenure (years)",0,200,10)
    dependents = st.number_input("Number of Dependents",0,10,0)
with col2:
    gender = st.selectbox("Gender",["Male","Female"])
    job_role = st.selectbox("Job Role",["Finance","Healthcare","Technology","Education","Media"])
    edu = st.selectbox("Education Level",["High School","Associate Degree","Bachelor’s Degree","Master’s Degree","PhD"])
    marital = st.selectbox("Marital Status",["Married","Single","Divorced"])
    job_level = st.selectbox("Job Level",["Entry","Mid","Senior"])
    company_size = st.selectbox("Company Size",["Small","Medium","Large"])
    remote = st.selectbox("Remote Work",["No","Yes"])
    leader = st.selectbox("Leadership Opportunities",["No","Yes"])
    innov = st.selectbox("Innovation Opportunities",["No","Yes"])
    overtime = st.selectbox("Overtime",["No","Yes"])

wlb = st.selectbox("Work-Life Balance",["Poor","Below Average","Good","Excellent"])
job_sat = st.selectbox("Job Satisfaction",["Very Low","Low","Medium","High"])
perf = st.selectbox("Performance Rating",["Low","Below Average","Average","High"])
reputation = st.selectbox("Company Reputation",["Very Poor","Poor","Good","Excellent"])
recognition = st.selectbox("Employee Recognition",["Very Low","Low","Medium","High"])
threshold = st.slider("Decision Threshold",0.05,0.95,0.5,0.01)

# --- Prediction ---
if st.button("Predict risk"):
    row = pd.DataFrame([{
        "Age": age, "Years at Company": years_company, "Monthly Income": monthly_income,
        "Number of Promotions": num_prom, "Distance from Home": dist_home,
        "Company Tenure": tenure, "Number of Dependents": dependents,
        "Gender": gender, "Job Role": job_role, "Education Level": edu,
        "Marital Status": marital, "Job Level": job_level, "Company Size": company_size,
        "Remote Work": remote, "Leadership Opportunities": leader, "Innovation Opportunities": innov,
        "Overtime": overtime, "Work-Life Balance": wlb, "Job Satisfaction": job_sat,
        "Performance Rating": perf, "Company Reputation": reputation, "Employee Recognition": recognition
    }])

    # Force numeric only on numerical columns
    numeric_features = [
        "Age","Years at Company","Monthly Income","Number of Promotions",
        "Distance from Home","Company Tenure","Number of Dependents"
    ]
    for col in numeric_features:
        row[col] = pd.to_numeric(row[col], errors="coerce")

    # --- Ensure ordinal categories match training ---
    ordinal_map = {
        "Work-Life Balance": ["Poor","Below Average","Good","Excellent"],
        "Job Satisfaction": ["Very Low","Low","Medium","High"],
        "Performance Rating": ["Low","Below Average","Average","High"],
        "Company Reputation": ["Very Poor","Poor","Good","Excellent"],
        "Employee Recognition": ["Very Low","Low","Medium","High"]
    }
    for col, cats in ordinal_map.items():
        if col in row:
            row[col] = pd.Categorical(row[col], categories=cats, ordered=True)

        # --- Predict ---
    prob = float(pipe.predict_proba(row)[:,1][0])
    pred = int(prob >= threshold)

    # --- Dynamic tone based on probability ---
    if prob >= 0.8:
        st.warning("⚠️ Very high risk of attrition — immediate HR attention recommended.")
    elif prob >= 0.6:
        st.info("🔶 Moderate to high risk — review key employee satisfaction factors.")
    elif prob <= 0.3:
        st.success("✅ Very low attrition risk — employee appears stable and satisfied.")
    else:
        st.info("🟢 Low to moderate risk — maintain positive engagement and monitor occasionally.")

    st.subheader(f"Probability of leaving: **{prob:.2%}**")
    st.write("Prediction:", "🔴 High risk" if pred == 1 else "🟢 Low risk")

    # --- Rule-based recommendations ---
    suggestions = []

    # ----- HIGH RISK CASES -----
    if pred == 1:
        # Job satisfaction
        if row["Job Satisfaction"].iloc[0] in ["Very Low", "Low"]:
            suggestions.append("Improve job satisfaction through recognition, workload management, or career development.")
        elif row["Job Satisfaction"].iloc[0] == "Medium":
            suggestions.append("Consider collecting feedback to identify satisfaction gaps.")

        # Work-life balance
        if row["Work-Life Balance"].iloc[0] in ["Poor", "Below Average"]:
            suggestions.append("Encourage flexible hours or partial remote work to enhance work-life balance.")

        # Compensation fairness
        if row["Monthly Income"].iloc[0] < 4000 and row["Job Level"].iloc[0] in ["Mid", "Senior"]:
            suggestions.append("Review compensation for fairness relative to experience and responsibilities.")
        elif row["Monthly Income"].iloc[0] < 2500 and row["Job Level"].iloc[0] == "Entry":
            suggestions.append("Consider revising entry-level compensation to stay competitive.")

        # Performance & development
        if row["Performance Rating"].iloc[0] in ["Low", "Below Average"]:
            suggestions.append("Provide mentoring or skill-building opportunities to improve performance.")
        elif row["Performance Rating"].iloc[0] == "Average":
            suggestions.append("Encourage training to promote career growth and advancement.")

        # Recognition
        if row["Employee Recognition"].iloc[0] in ["Very Low", "Low"]:
            suggestions.append("Enhance recognition programs — regular appreciation reduces attrition risk.")

        # Company reputation
        if row["Company Reputation"].iloc[0] in ["Very Poor", "Poor"]:
            suggestions.append("Work on strengthening company culture and internal communication.")

        # Education vs income mismatch
        if row["Education Level"].iloc[0] in ["Master’s Degree", "PhD"] and row["Monthly Income"].iloc[0] < 5000:
            suggestions.append("Reevaluate compensation for highly qualified employees to ensure fairness.")

        # Tenure and promotion stagnation
        if row["Years at Company"].iloc[0] > 8 and row["Number of Promotions"].iloc[0] == 0:
            suggestions.append("Consider offering promotions or new responsibilities for long-tenured staff.")

        # Distance and remote options
        if row["Distance from Home"].iloc[0] > 30:
            suggestions.append("Explore hybrid work options — long commutes often drive attrition.")
        if row["Remote Work"].iloc[0] == "No" and row["Work-Life Balance"].iloc[0] in ["Poor", "Below Average"]:
            suggestions.append("Introduce partial remote options to improve balance and satisfaction.")

        # Growth & innovation opportunities
        if row["Leadership Opportunities"].iloc[0] == "No":
            suggestions.append("Offer leadership or project ownership opportunities to enhance engagement.")
        if row["Innovation Opportunities"].iloc[0] == "No":
            suggestions.append("Encourage involvement in innovation initiatives to improve motivation.")

    # ----- LOW RISK CASES -----
    else:
        if prob < 0.2:
            st.success("🌟 Excellent retention indicators — maintain current work environment.")
        elif prob < 0.4:
            st.info("👍 Stable employee — minor improvements can further reduce attrition risk.")

        # Reinforce positives
        if row["Job Satisfaction"].iloc[0] == "High":
            suggestions.append("Continue providing career growth and recognition programs.")
        if row["Work-Life Balance"].iloc[0] in ["Good", "Excellent"]:
            suggestions.append("Maintain balanced workloads and flexible arrangements.")
        if row["Employee Recognition"].iloc[0] == "High":
            suggestions.append("Sustain recognition culture — a key driver of engagement.")

        # Continuous improvement
        if row["Performance Rating"].iloc[0] == "Average":
            suggestions.append("Encourage further performance development with targeted feedback.")
        if row["Years at Company"].iloc[0] > 5 and row["Number of Promotions"].iloc[0] < 1:
            suggestions.append("Plan a career progression roadmap to keep motivation high.")

    # --- Display suggestions ---
    if suggestions:
        st.markdown("### 💡 Recommendations:")
        for s in suggestions:
            st.markdown(f"- {s}")


Overwriting app.py


In [None]:
# Install Streamlit + dependencies
!pip install streamlit==1.39 joblib scikit-learn -q

# Install Cloudflared
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
!dpkg -i cloudflared-linux-amd64.deb

!pkill streamlit
!pkill cloudflared

!kill -9 $(lsof -t -i:8501) 2>/dev/null
!streamlit run app.py --server.port 8501 --server.address 0.0.0.0 >/dev/null 2>&1 &
!cloudflared tunnel --url http://localhost:8501 --no-autoupdate

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.5/65.5 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m94.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m91.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.3/79.3 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-adk 1.15.1 requires watchdog<7.0.0,>=6.0.0, but you have watchdog 5.0.3 which is incompatible.[0m[3