<a href="https://colab.research.google.com/github/ahmedshoiab/Model-Benchmark-Dashboard/blob/main/Model_Benchmark_Dashboard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
# income_classifier.py

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Column names
columns = [
    "age", "workclass", "fnlwgt", "education", "education-num",
    "marital-status", "occupation", "relationship", "race", "sex",
    "capital-gain", "capital-loss", "hours-per-week", "native-country", "income"
]

# Load dataset
url_train = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
url_test = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test"

df_train = pd.read_csv(url_train, header=None, names=columns, na_values=" ?", skipinitialspace=True)
df_test = pd.read_csv(url_test, header=0, names=columns, na_values=" ?", skipinitialspace=True, comment='|')
df_test['income'] = df_test['income'].str.replace('.', '', regex=False)

# Combine and clean
df = pd.concat([df_train, df_test], axis=0).reset_index(drop=True)
df.dropna(inplace=True)

# Encode target
df['income'] = df['income'].apply(lambda x: 1 if x == '>50K' else 0)

# Encode categoricals
categorical_columns = df.select_dtypes(include='object').columns.tolist()
if 'income' in categorical_columns:
    categorical_columns.remove('income')

df_encoded = pd.get_dummies(df, columns=categorical_columns)

# Scale numeric columns
numerical_columns = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
scaler = StandardScaler()
df_encoded[numerical_columns] = scaler.fit_transform(df_encoded[numerical_columns])

# Split
X = df_encoded.drop('income', axis=1)
y = df_encoded['income']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Save artifacts
joblib.dump(model, "income_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(X.columns.tolist(), "feature_columns.pkl")  # Only input features (no income!)


Accuracy: 0.8503429214863343
              precision    recall  f1-score   support

           0       0.88      0.93      0.90      7431
           1       0.73      0.60      0.66      2338

    accuracy                           0.85      9769
   macro avg       0.80      0.76      0.78      9769
weighted avg       0.84      0.85      0.85      9769



['feature_columns.pkl']

In [11]:
!pip install streamlit



In [29]:
# app.py
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib
import numpy as np

# Load model, scaler, and feature columns
model = joblib.load("income_model.pkl")
scaler = joblib.load("scaler.pkl")
feature_columns = joblib.load("feature_columns.pkl")

st.title("💰 Income Prediction App")
st.write("Enter personal data to predict whether income is >50K or <=50K")

# Input fields
age = st.number_input("Age", min_value=18, max_value=100, value=30)
fnlwgt = st.number_input("Final Weight", value=150000)
education_num = st.slider("Education Level (numeric)", 1, 16, 10)
capital_gain = st.number_input("Capital Gain", value=0)
capital_loss = st.number_input("Capital Loss", value=0)
hours_per_week = st.slider("Hours Worked per Week", 1, 100, 40)

workclass = st.selectbox("Workclass", [
    "Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov",
    "Local-gov", "State-gov", "Without-pay", "Never-worked"
])

education = st.selectbox("Education", [
    "Bachelors", "Some-college", "11th", "HS-grad", "Prof-school",
    "Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters"
])

marital_status = st.selectbox("Marital Status", [
    "Married-civ-spouse", "Divorced", "Never-married", "Separated", "Widowed", "Married-spouse-absent"
])

occupation = st.selectbox("Occupation", [
    "Tech-support", "Craft-repair", "Other-service", "Sales", "Exec-managerial",
    "Prof-specialty", "Handlers-cleaners", "Machine-op-inspct", "Adm-clerical", "Farming-fishing"
])

relationship = st.selectbox("Relationship", ["Wife", "Own-child", "Husband", "Not-in-family", "Other-relative"])
race = st.selectbox("Race", ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"])
sex = st.selectbox("Sex", ["Male", "Female"])
native_country = st.selectbox("Native Country", ["United-States", "Mexico", "Philippines", "Germany", "Canada"])

# Assemble input
user_input = {
    'age': age,
    'fnlwgt': fnlwgt,
    'education-num': education_num,
    'capital-gain': capital_gain,
    'capital-loss': capital_loss,
    'hours-per-week': hours_per_week,
    'workclass_' + workclass: 1,
    'education_' + education: 1,
    'marital-status_' + marital_status: 1,
    'occupation_' + occupation: 1,
    'relationship_' + relationship: 1,
    'race_' + race: 1,
    'sex_' + sex: 1,
    'native-country_' + native_country: 1
}

# Create full row with all 0s
input_vector = pd.DataFrame(np.zeros((1, len(feature_columns))), columns=feature_columns)

# Set numerical fields
input_vector['age'] = age
input_vector['fnlwgt'] = fnlwgt
input_vector['education-num'] = education_num
input_vector['capital-gain'] = capital_gain
input_vector['capital-loss'] = capital_loss
input_vector['hours-per-week'] = hours_per_week

# Set one-hot encoded fields
for key in user_input:
    if key in input_vector.columns:
        input_vector.at[0, key] = user_input[key]

# Scale numeric
numeric_cols = ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
input_vector[numeric_cols] = scaler.transform(input_vector[numeric_cols])

# Predict
if st.button("Predict Income"):
    pred = model.predict(input_vector)[0]
    if pred == 1:
        st.success("✅ This person is likely to earn **>50K**.")
    else:
        st.warning("❌ This person is likely to earn **<=50K**.")


Overwriting app.py


In [12]:
import joblib

# Save the trained model
joblib.dump(model, 'income_model.pkl')

['income_model.pkl']

In [13]:
# @title Default title text
model = joblib.load('income_model.pkl')

In [14]:
!pip install pyngrok



In [15]:
!pip install streamlit pyngrok joblib catboost



In [26]:
from pyngrok import ngrok
# Authenticate ngrok
!ngrok config add-authtoken 30QBDmM2N9S1LgUks0BRvk75KfJ_h2ZQmkNjNsfFP6iDxHYg  # Your actual token

# Run the Streamlit app
get_ipython().system_raw('streamlit run app.py &')
#ak_30Kkion9vkeKQhT1pEu9sdbA3mB
# Create tunnel
from pyngrok import ngrok
public_url = ngrok.connect(8501)
print("Your app is live at:", public_url)

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml




PyngrokNgrokHTTPError: ngrok client exception, API returned 502: {"error_code":103,"status_code":502,"msg":"failed to start tunnel","details":{"err":"failed to start tunnel: Your account may not run more than 3 tunnels over a single ngrok agent session.\nThe tunnels already running on this session are:\ntn_30QDDv2F02vRyv2P6BEYI8ENSly, tn_30QDQeqO4TwazWtcPkNnYOBqZ6w, tn_30QDboq8XRVJKSPXFa2TxFKvDUU\n\r\n\r\nERR_NGROK_324\r\n"}}
