# 💼 Employee Salary Prediction using Linear Regression

### **Step 1: Import Required Libraries** ¶


In [31]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib


In [32]:
print(data.columns.tolist())

['age', 'workclass', 'fnlwgt', 'education', 'educational-num', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income', 'YearsExperience', 'Salary']


### **Step 2: Load and Explore the Dataset** ¶


In [33]:

# Load the dataset
data = pd.read_csv("adult 3.csv")

# Display the first few rows
data.head()


Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income,YearsExperience,Salary
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K,3,
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K,0,
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K,11,
3,44,Private,160323,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K,17,
4,18,?,103497,Some-college,10,Never-married,?,Own-child,White,Female,0,0,30,United-States,<=50K,18,


In [34]:

# Check for missing values
data.isnull().sum()


age                    0
workclass              0
fnlwgt                 0
education              0
educational-num        0
marital-status         0
occupation             0
relationship           0
race                   0
gender                 0
capital-gain           0
capital-loss           0
hours-per-week         0
native-country         0
income                 0
YearsExperience        0
Salary             48842
dtype: int64

In [35]:

# Dataset shape
data.shape


(48842, 17)

### **Step 3: Select Features and Target** ¶


In [36]:
np.random.seed(42)
data["Salary"] = data["YearsExperience"] * 10000 + np.random.randint(-3000, 3000, size=len(data))
print("Missing Salary values:", data["Salary"].isna().sum())  # Should be 0

Missing Salary values: 0


In [37]:
# Feature and target selection
X = data[["YearsExperience"]]  # Replace if column name differs
y = data["Salary"]

### **Step 4: Split Data into Training and Testing Sets** ¶


In [38]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



### **Step 5: Train the Classification Model** ¶


In [39]:

model = LinearRegression()
model.fit(X_train, y_train)


### **Step 6: Evaluate the Model** ¶


In [40]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 3046267.6577280383


### **Step 7: Save the Trained Model** ¶


In [41]:

joblib.dump(model, "model.pkl")
print("Model saved as model.pkl")


Model saved as model.pkl


### **Step 8: Load and Use Your Trained Model** ¶

In [42]:
import joblib

# Load the saved model
loaded_model = joblib.load("model.pkl")

# Predict salary for a specific experience level
years_exp = [[6]]  # Example: 6 years
predicted_salary = loaded_model.predict(years_exp)
print(f"📈 Predicted Salary for 6 years of experience: ₹{int(predicted_salary[0])}")

📈 Predicted Salary for 6 years of experience: ₹59998


