# Salary Prediction Project
This notebook demonstrates a data science workflow for predicting salaries using Python.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

## Load Dataset

In [None]:
df = pd.read_csv('salaries.csv')
df.head()

## Exploratory Data Analysis (EDA)

In [None]:
sns.histplot(df['Salary'], kde=True)
plt.show()

sns.boxplot(x='Education', y='Salary', data=df)
plt.show()

sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.show()

## Data Preprocessing

In [None]:
# Encode categorical features
cat_cols = df.select_dtypes(include=['object']).columns
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

X = df.drop('Salary', axis=1)
y = df['Salary']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Model Training & Evaluation

In [None]:
def evaluate(y_test, y_pred, model_name):
    print(f'📌 {model_name} Results')
    print('R2 Score:', r2_score(y_test, y_pred))
    print('MAE:', mean_absolute_error(y_test, y_pred))
    print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
    print('-'*40)

# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
y_pred_lin = lin_reg.predict(X_test)
evaluate(y_test, y_pred_lin, 'Linear Regression')

# Random Forest
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
evaluate(y_test, y_pred_rf, 'Random Forest')

## Predictions Visualization

In [None]:
plt.scatter(y_test, y_pred_rf, alpha=0.5)
plt.xlabel('Actual Salary')
plt.ylabel('Predicted Salary')
plt.title('Random Forest Salary Prediction')
plt.show()