In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('Salary_dataset.csv')

# Clean the dataset by removing unnecessary columns
data_clean = data.drop(columns=['Unnamed: 0'])

# Define the features (YearsExperience) and the target variable (Salary)
X = data_clean[['YearsExperience']]
y = data_clean['Salary']

# Split the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model using the training data
model.fit(X_train, y_train)

# Make predictions using the test data
y_pred = model.predict(X_test)

# Evaluate the model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the results
print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R²) score: {r2}')

# Predict salary for a specific input (example: 5 years of experience)
years_experience = 5
predicted_salary = model.predict([[years_experience]])[0]
print(f'Predicted Salary for {years_experience} years of experience: ${predicted_salary:,.2f}')


Mean Squared Error (MSE): 49830096.855908394
R-squared (R²) score: 0.9024461774180497
Predicted Salary for 5 years of experience: $71,499.28




In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load the dataset
data = pd.read_csv('Salary_dataset.csv')

# Clean the dataset by removing unnecessary columns
data_clean = data.drop(columns=['Unnamed: 0'])

# Define the features (YearsExperience) and the target variable (Salary)
X = data_clean[['YearsExperience']]
y = data_clean['Salary']

# Split the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model using the training data
model.fit(X_train, y_train)

# Save the trained model to a file
model_filename = 'salary_prediction_model.pkl'
joblib.dump(model, model_filename)
print(f"Model saved to {model_filename}")

# Load the model from the file (to test saving/loading)
loaded_model = joblib.load(model_filename)

# Predict salary for a specific input (example: 5 years of experience) using the loaded model
years_experience = 5
predicted_salary = loaded_model.predict([[years_experience]])[0]
print(f'Predicted Salary for {years_experience} years of experience: ${predicted_salary:,.2f}')


Model saved to salary_prediction_model.pkl
Predicted Salary for 5 years of experience: $71,499.28


