<a href="https://colab.research.google.com/github/arunak451/project/blob/main/salary_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
!pip install gradio
import gradio as gr

# Load data
df = pd.read_csv('/content/Salary Data.csv')

# Clean data - remove empty rows and fix obvious errors
df = df.dropna()
df = df[df['Salary'] > 1000]  # Remove the row with salary=350

# Preprocessing
categorical_features = ['Gender', 'Education Level', 'Job Title']
numeric_features = ['Age', 'Years of Experience']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ])

# Split data
X = df.drop('Salary', axis=1)
y = df['Salary']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train model
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred)

print(f"R2 Score: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")

R2 Score: 0.8843
RMSE: 294663360.59


In [5]:
def predict_salary(age, gender, education_level, job_title, years_experience):
    input_data = pd.DataFrame({
        'Age': [age],
        'Gender': [gender],
        'Education Level': [education_level],
        'Job Title': [job_title],
        'Years of Experience': [years_experience]
    })

    prediction = model.predict(input_data)[0]
    return f"Predicted Salary: ${prediction:,.2f}"

# Get unique values for categorical features
job_titles = sorted(df['Job Title'].unique())
education_levels = sorted(df['Education Level'].unique())
genders = sorted(df['Gender'].unique())

# Create interface
iface = gr.Interface(
    fn=predict_salary,
    inputs=[
        gr.Number(label="Age", minimum=18, maximum=70),
        gr.Dropdown(label="Gender", choices=genders),
        gr.Dropdown(label="Education Level", choices=education_levels),
        gr.Dropdown(label="Job Title", choices=job_titles),
        gr.Number(label="Years of Experience", minimum=0, maximum=50)
    ],
    outputs="text",
    title="Salary Prediction Tool",
    description="Predict salary based on demographic and professional factors"
)

iface.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c39510397c7c76ae6f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


