## Importing required libraries 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import warnings

## Load the Dataset

In [None]:
df_sal = pd.read_csv('/kaggle/input/position-salaries/Position_Salaries.csv')
df_sal.head()

## Data Analysis

In [None]:
df_sal.describe()

In [None]:

warnings.filterwarnings("ignore",category=FutureWarning)
plt.title('Salary Distribution Plot')
plt.xlabel('Salary')
plt.ylabel('Frequency')
sns.histplot(df_sal['Salary'],kde='True')
plt.show()

In [None]:
print('salary column contains NaN values ',df_sal['Salary'].isnull().sum())
print('Salary column contains inf values ',np.isinf(df_sal['Salary']).sum())
df_sal.columns
# output:
# salary column contains NaN values  0
# Salary column contains inf values  0
# it is  free from NAN & inf values

## Relationship btn Salary & Level

In [None]:
plt.scatter(df_sal['Level'],df_sal['Salary'],color='violet')
plt.title('Level vs Salary')
plt.xlabel('Level')
plt.ylabel('Salary')
plt.box(False)
plt.show()

## Splitting the dataset into dependent & independent vars
 sal - dpnt,
level-indpnt

In [None]:
X = df_sal.iloc[:,1:-1].values
y = df_sal.iloc[:,-1].values
# print(X,y)

## Training 

In [None]:
#Train lr model on whole dataset since our dataset is small

lr = LinearRegression()
lr.fit(X,y)

#Train pr model on whole dataset since our dataset is small
pr = PolynomialFeatures(degree=4)
X_poly = pr.fit_transform(X)

lr_2 = LinearRegression()
lr_2.fit(X_poly,y)

## Predict the results 

In [None]:
y_pred_lr = lr.predict(X)
y_pred_poly = lr_2.predict(X_poly)

## Visualize predictions

In [None]:
plt.scatter(X,y,color = 'violet')
plt.plot(X,y_pred_lr,color = 'firebrick')
plt.title('Real Data(Linear Regression)')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.legend(['X/y_pred_lr','X/y'],title='Salary/Level',loc='best',facecolor='white')
plt.box(False)
plt.show()

In [None]:
X_grid = np.arange(min(X),max(X),0.1)
X_grid = X_grid.reshape((len(X_grid),1))
plt.scatter(X,y,color='pink')
plt.plot(X,y_pred_poly,color='firebrick')
plt.title('Real Data (Polynomial Regression)')
plt.xlabel('Position Level')
plt.ylabel('Salary')
plt.legend(['X/y_pred_poly','X/y'],title = 'Salary/Level',loc='best')
plt.box(False)
plt.show()

In [None]:
print(f'LR :{lr.predict([[7.5]])}')

print(f'Polynomial Regression:{lr_2.predict(pr.fit_transform([[7.5]]))}')