## ****Importing Libraries****

In [None]:
#Importing Libraries for Dataset and Visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#Importing Machine Learning Libraries
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_squared_error

## **Reading CSV**

In [None]:
#Reading CSV
df = pd.read_csv("/kaggle/input/random-salary-data-of-employes-age-wise/Salary_Data.csv")
df.head()

## **Performing EDA**

In [None]:
#Data Description
df.describe()

In [None]:
#Data Information
df.info()

In [None]:
#Checking Nulls
df.isna().sum()

In [None]:
#Checking Duplicates
df.duplicated().sum()

In [None]:
#Plotting Data points
plt.figure(figsize =(10, 6))
plt.scatter(df['YearsExperience'], df['Salary'])

#Adding Labels
plt.xlabel("Years of Experience")
plt.ylabel("Salary")
plt.show()

In [None]:
#Checking Outliers
sns.boxplot(df['YearsExperience'])
plt.show()

## **Applying Linear Regression** 

In [None]:
#Preparing Data
X = df['YearsExperience'].values.reshape(-1,1)
y = df['Salary'].values.reshape(-1,1)

In [None]:
#Spliting Data for Train and Test
X_train, X_test, y_train, y_test = train_test_split(X , y, test_size = 0.2, random_state = 42)

In [None]:
#Applying Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)

In [None]:
y_hat = lr.predict(X_test)
y_hat

In [None]:
plt.figure(figsize= (10, 6))
plt.scatter(X_train, y_train, color = "b")
plt.plot(X_train, lr.predict(X_train), c = "orange")
plt.xlabel("Experience")
plt.ylabel("Salary")
plt.show()

In [None]:
index_list = [i for i in range(1, len(y_test)+1, 1)]  
plt.plot(index_list, y_test, color='b', linestyle='-', label="Real Values")  
plt.plot(index_list, y_hat, color='r', linestyle='-', label="Predicted Values")  
plt.title("Comparison Between Real Test Values and Predicted Values")
plt.legend()  # Added legend to distinguish lines
plt.show()

## **Evaluation of Linear Regression**

In [None]:
#Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_hat)

In [None]:
#Displaying MSE
print('Mean Squared Error :',mse)