In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Global variables
separator = "_" * 20
divider = f"\n{"-"*80}\n"

# Given Dataset
data = {
    'YearsExperience': [1.1, 2.0, 3.2, 4.5, np.nan, 6.8, 7.5, 8.3, 9.0, 10.5],
    'Salary': [39343, 46205, np.nan, 60000, 65200, 72500, np.nan, 83000, 88000, 95000]
   
}

df = pd.DataFrame(data)

# Remove rows with missing values
df_clean = df.dropna()

x = df_clean[['YearsExperience']].values
y = df_clean['Salary'].values
print(f"{separator} Cleaned DataFrame: {separator}")
print(df_clean)

____________________ Cleaned DataFrame: ____________________
   YearsExperience   Salary
0              1.1  39343.0
1              2.0  46205.0
3              4.5  60000.0
5              6.8  72500.0
7              8.3  83000.0
8              9.0  88000.0
9             10.5  95000.0


In [2]:
# Split into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

print(f"{separator} x_train: {separator}")
print(x_train)
print(divider)
print(f"{separator} y_train: {separator}")
print(y_train)

____________________ x_train: ____________________
[[ 9. ]
 [ 4.5]
 [ 8.3]
 [ 6.8]
 [10.5]]

--------------------------------------------------------------------------------

____________________ y_train: ____________________
[88000. 60000. 83000. 72500. 95000.]


In [3]:
# Training the simple linear regression model
regressor = LinearRegression()
regressor.fit(x_train, y_train)

print(f"{separator} Model trained! {separator}")

____________________ Model trained! ____________________


In [4]:
# Viewing learned parameters
print(f"{separator} Slope (Coefficient): {separator}")
print(regressor.coef_[0])
print(f"{separator} Intercept: {separator}")
print(regressor.intercept_)

____________________ Slope (Coefficient): ____________________
5996.262219666475
____________________ Intercept: ____________________
32809.22944220816


In [5]:
# Predicting on the test set
y_pred = regressor.predict(x_test)

print(f"{separator} Predicted values: {separator}")
print(y_pred)
print(divider)
print(f"{separator} Actual values: ")
print(y_test)

____________________ Predicted values: ____________________
[39405.11788384 44801.75388154]

--------------------------------------------------------------------------------

____________________ Actual values: 
[39343. 46205.]


In [6]:
# Computing Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f"{separator} Mean Squared Error: {separator}")
print(mse)

____________________ Mean Squared Error: ____________________
986479.1502314303


In [7]:
# Computing Root Mean Squared Error for easier interpretation
rmse = np.sqrt(mse)
print(f"{separator} Root Mean Squared Error: {separator}")
print(rmse)

____________________ Root Mean Squared Error: ____________________
993.216567638413


In [8]:
# Extracting and printing slope and intercept
slope = regressor.coef_[0]
intercept = regressor.intercept_

print(f"{separator} Slope (Coefficient): {separator}")
print(slope)
print(divider)
print(f"{separator} Intercept: {separator}")
print(intercept)


____________________ Slope (Coefficient): ____________________
5996.262219666475

--------------------------------------------------------------------------------

____________________ Intercept: ____________________
32809.22944220816


In [9]:
# Interpreting the coefficient
if slope > 0:
    print(">> Positive coefficient: Salary increases as YearsExperience increases.")
elif slope < 0:
    print(">> Negative coefficient: Salary decreases as YearsExperience increases.")
else:
    print(">> Zero coefficient: No relationship between YearsExperience and Salary.")

print(divider)
print(f"{separator} Interpretation: {separator}")
print(f">> For each additional year of experience, salary increases by about {slope:.2f}.")
print(f">> When experience is zero, predicted salary is about {intercept:.2f}.")

>> Positive coefficient: Salary increases as YearsExperience increases.

--------------------------------------------------------------------------------

____________________ Interpretation: ____________________
>> For each additional year of experience, salary increases by about 5996.26.
>> When experience is zero, predicted salary is about 32809.23.
