#Installing Gradio for Interface Deployment

In [1]:
!pip install gradio --quiet


#Library Imports for Machine Learning Pipeline with Gradio Interface

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor

# Load the dataset

In [4]:

df = pd.read_csv('/content/sample_data/SALARY DATA.csv')
df.dropna(inplace=True)

#Displaying the Salary DataFrame

In [5]:
df

Unnamed: 0,Age,Gender,Education Level,Job Title,Years of Experience,Salary
0,32.0,Male,Bachelor's,Software Engineer,5.0,90000.0
1,28.0,Female,Master's,Data Analyst,3.0,65000.0
2,45.0,Male,PhD,Senior Manager,15.0,150000.0
3,36.0,Female,Bachelor's,Sales Associate,7.0,60000.0
4,52.0,Male,Master's,Director,20.0,200000.0
...,...,...,...,...,...,...
370,35.0,Female,Bachelor's,Senior Marketing Analyst,8.0,85000.0
371,43.0,Male,Master's,Director of Operations,19.0,170000.0
372,29.0,Female,Bachelor's,Junior Project Manager,2.0,40000.0
373,34.0,Male,Bachelor's,Senior Operations Coordinator,7.0,90000.0


#Identifying Null Values in the Dataset

In [6]:
df.isnull().sum()

Unnamed: 0,0
Age,0
Gender,0
Education Level,0
Job Title,0
Years of Experience,0
Salary,0


# Features and target

In [7]:

X = df.drop('Salary', axis=1)
y = df['Salary']

#Categorical and numerical features

In [8]:

categorical_cols = ['Gender', 'Education Level', 'Job Title']
numerical_cols = ['Age', 'Years of Experience']


#Preprocessing and model pipeline

In [9]:

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ],
    remainder='passthrough'
)

model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])


#Train model

In [10]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model_pipeline.fit(X_train, y_train)


The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [11]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, model_pipeline.predict(X_test))
r2 = r2_score(y_test, model_pipeline.predict(X_test))

#as r2 value is 87% which model is performing good we can use this model for deployment purpose

In [12]:
print("Model Evaluation Parameters:")
print(f"Mean Absolute Error (MAE): {mse:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R²): {r2:.2f}")

Model Evaluation Parameters:
Mean Absolute Error (MAE): 315878566.67
Mean Squared Error (MSE): 315878566.67
R-squared (R²): 0.87


# Define prediction function

In [13]:

def predict_salary(age, gender, education, job_title, experience):
    input_df = pd.DataFrame([{
        'Age': age,
        'Gender': gender,
        'Education Level': education,
        'Job Title': job_title,
        'Years of Experience': experience
    }])
    prediction = model_pipeline.predict(input_df)[0]
    return f"Predicted Salary: Rs{prediction:,.2f}"



# Get unique values for dropdowns

In [14]:

genders = df['Gender'].unique().tolist()
educations = df['Education Level'].unique().tolist()
job_titles = df['Job Title'].unique().tolist()


# Create Gradio UI

In [15]:

with gr.Blocks() as demo:
    gr.Markdown("## 💼 Salary Prediction For New Joinee in Netcraftz")

    with gr.Row():
        age = gr.Slider(18, 65, label="Age")
        experience = gr.Slider(0, 40, label="Years of Experience")

    gender = gr.Dropdown(choices=genders, label="Gender")
    education = gr.Dropdown(choices=educations, label="Education Level")
    job_title = gr.Dropdown(choices=job_titles, label="Job Title")

    predict_button = gr.Button("Predict Salary")
    output = gr.Textbox(label="Result")

    predict_button.click(fn=predict_salary, inputs=[age, gender, education, job_title, experience], outputs=output)


# Launch app

In [16]:
# Launch app
demo.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://281e8fd23829cead38.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


