<a href="https://colab.research.google.com/github/jahnaviakveti/Employeesalaryprediction/blob/main/Employee_salary_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import pandas as pd

df = pd.read_csv("/content/adult 3.csv")
df.head()


Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
4,18,?,103497,Some-college,10,Never-married,?,Own-child,White,Female,0,0,30,United-States,<=50K


In [15]:
df.replace("?", pd.NA, inplace=True)
df.dropna(inplace=True)

df.rename(columns={"gender": "sex"}, inplace=True)

In [16]:
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
for col in df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X = df.drop("income", axis=1)
y = df["income"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

0.8560530679933664

In [17]:
import joblib

joblib.dump(model, "salary_model.pkl")
joblib.dump(label_encoders, "label_encoders.pkl")

['label_encoders.pkl']

In [18]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib

st.markdown(
    """
    <style>
    .stApp {
        background-image: url("https://img.freepik.com/premium-photo/stack-dollar-blue-background-with-earning-profit_903752-4857.jpg");
        background-size: cover;
        background-repeat: no-repeat;
        background-attachment: fixed;
    }
    </style>
    """,
    unsafe_allow_html=True
)

model = joblib.load("salary_model.pkl")
label_encoders = joblib.load("label_encoders.pkl")

st.markdown(
    "<h1 style='color: black; text-align: center;'>💻Employee Salary Predictor💰</h1>",
    unsafe_allow_html=True
)

def user_input():
    st.sidebar.header("Input")
    age = st.sidebar.slider("Age", 18, 90, 30)
    workclass = st.sidebar.selectbox("Workclass", label_encoders['workclass'].classes_)
    education = st.sidebar.selectbox("Education", label_encoders['education'].classes_)
    marital_status = st.sidebar.selectbox("Marital Status", label_encoders['marital-status'].classes_)
    occupation = st.sidebar.selectbox("Occupation", label_encoders['occupation'].classes_)
    relationship = st.sidebar.selectbox("Relationship", label_encoders['relationship'].classes_)
    race = st.sidebar.selectbox("Race", label_encoders['race'].classes_)
    sex = st.sidebar.selectbox("Sex", label_encoders['sex'].classes_)
    capital_gain = st.sidebar.number_input("Capital Gain", 0, 100000, 0)
    capital_loss = st.sidebar.number_input("Capital Loss", 0, 5000, 0)
    hours = st.sidebar.slider("Hours per Week", 1, 99, 40)
    native_country = st.sidebar.selectbox("Native Country", label_encoders['native-country'].classes_)
    edu_num = st.sidebar.slider("Education Number", 1, 16, 10)
    fnlwgt = st.sidebar.number_input("Final Weight", 10000, 1000000, 300000)

    data = {
        "age": age,
        "workclass": label_encoders["workclass"].transform([workclass])[0],
        "fnlwgt": fnlwgt,
        "education": label_encoders["education"].transform([education])[0],
        "educational-num": edu_num,
        "marital-status": label_encoders["marital-status"].transform([marital_status])[0],
        "occupation": label_encoders["occupation"].transform([occupation])[0],
        "relationship": label_encoders["relationship"].transform([relationship])[0],
        "race": label_encoders["race"].transform([race])[0],
        "sex": label_encoders["sex"].transform([sex])[0],
        "capital-gain": capital_gain,
        "capital-loss": capital_loss,
        "hours-per-week": hours,
        "native-country": label_encoders["native-country"].transform([native_country])[0]
    }

    return pd.DataFrame([data])

input_df = user_input()
pred = model.predict(input_df)[0]
proba = model.predict_proba(input_df)[0][pred]
label = label_encoders["income"].inverse_transform([pred])[0]

st.markdown(
    f"""
    <style>
    .result-box {{
        background: linear-gradient(145deg, #ffffff, #e6e6e6);
        border: 2px solid #ccc;
        padding: 30px;
        border-radius: 20px;
        width: 60%;
        margin: 40px auto;
        text-align: center;
        box-shadow: 8px 8px 20px rgba(0, 0, 0, 0.1);
        transition: transform 0.3s ease;
        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
    }}

    .result-box:hover {{
        transform: scale(1.02);
        box-shadow: 10px 10px 25px rgba(0, 0, 0, 0.2);
    }}

    .result-title {{
        font-size: 28px;
        font-weight: 800;
        color: #222;
        margin-bottom: 25px;
    }}

    .result-text {{
        font-size: 20px;
        color: #222;
        margin: 10px 0;
    }}

    .result-tag {{
        display: inline-block;
        background-color: #111;
        color: #00FF88;
        padding: 6px 12px;
        border-radius: 8px;
        font-size: 18px;
        font-weight: bold;
        margin-left: 10px;
    }}
    </style>

    <div class="result-box">
        <div class="result-title">📜 Result 📜</div>
        <div class="result-text">
            Predicted Income: <span class="result-tag">{label}</span>
        </div>
        <div class="result-text">
            Confidence: <span class="result-tag">{proba*100:.2f}%</span>
        </div>
    </div>
    """,
    unsafe_allow_html=True
)

Overwriting app.py


In [19]:
!pip install streamlit pyngrok -q


In [20]:
from pyngrok import ngrok
ngrok.kill()
!pkill streamlit

In [21]:
from pyngrok import ngrok
from google.colab import userdata
ngrok.set_auth_token(userdata.get('NGROK_AUTH_TOKEN'))
public_url = ngrok.connect(8501)
print("Streamlit URL:", public_url)
!streamlit run app.py &>/content/log.txt &

Streamlit URL: NgrokTunnel: "https://9c7efff24785.ngrok-free.app" -> "http://localhost:8501"
