## Regression Model Evaluation

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### load the data

In [3]:
df = pd.read_csv('Salary_Data.csv')
print(df.head())

   YearsExperience  Salary
0              1.1   39343
1              1.3   46205
2              1.5   37731
3              2.0   43525
4              2.2   39891


### prepare the dataset

In [4]:
# decide x and y
x = df.drop('Salary', axis=1)
y = df['Salary']

# split the dataset into training and testing
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=123456)

### model

In [5]:
from sklearn.linear_model import LinearRegression

# create the model
model = LinearRegression()

# train the model
model.fit(x_train, y_train)

LinearRegression()

### Evaluation

In [8]:
# predict the values for x_test
y_prediction = model.predict(x_test)

In [9]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

MAE = mean_absolute_error(y_test, y_prediction)
print(f"mean absolute error = {MAE}")

mean absolute error = 2734.304777539491


In [10]:
MSE = mean_squared_error(y_test, y_prediction)
print(f"mean squared error = {MSE}")

mean squared error = 9601570.176354375


In [11]:
RMSE = np.sqrt(MSE)
print(f"root mean squared error = {RMSE}")

root mean squared error = 3098.640052725449


In [12]:
R2 = r2_score(y_test, y_prediction)
print(f"R2 score = {R2}")

R2 score = 0.9932427848341637


In [14]:
n = len(y_test)
k = 1
ADJ_R2 = 1 - ((1 - R2) * ((n - 1)/(n - (k + 1))))
print(f"adjusted R2 = {ADJ_R2}")

adjusted R2 = 0.9918913418009965
