In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [112]:
data = {
    'YearsExperience':[1.1, 1.5, 2.0, 2.2, 3.0, 3.2, 3.9, 4.5, 5.1, 6.0],
    'Salary': [20000, 30000, 45000, 49525, 60150, 65400, 75000, 81111, 87938, 93088]
}


In [113]:
df = pd.DataFrame(data)

In [114]:
df.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,20000
1,1.5,30000
2,2.0,45000
3,2.2,49525
4,3.0,60150


In [115]:
df.info

<bound method DataFrame.info of    YearsExperience  Salary
0              1.1   20000
1              1.5   30000
2              2.0   45000
3              2.2   49525
4              3.0   60150
5              3.2   65400
6              3.9   75000
7              4.5   81111
8              5.1   87938
9              6.0   93088>

In [116]:
df.describe()

Unnamed: 0,YearsExperience,Salary
count,10.0,10.0
mean,3.25,60721.2
std,1.611935,24505.62008
min,1.1,20000.0
25%,2.05,46131.25
50%,3.1,62775.0
75%,4.35,79583.25
max,6.0,93088.0


In [117]:
df

Unnamed: 0,YearsExperience,Salary
0,1.1,20000
1,1.5,30000
2,2.0,45000
3,2.2,49525
4,3.0,60150
5,3.2,65400
6,3.9,75000
7,4.5,81111
8,5.1,87938
9,6.0,93088


In [118]:
X = df[['YearsExperience']]
Y = df[['Salary']]

In [119]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.4, random_state=42)

In [120]:
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)

X_train shape: (6, 1)
Y_train shape: (6, 1)


In [121]:
model = LinearRegression()
model.fit(X_train, Y_train)

In [122]:
y_pred = model.predict(X_test)

In [123]:
Y_pred = model.predict(X)
r2 = r2_score(Y, Y_pred)

In [125]:
n = len(Y)
k = X.shape[1]
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - k - 1)

In [126]:
print("R²:", r2)
print("Adjusted R²:", adj_r2)

R²: 0.9130350753280897
Adjusted R²: 0.9021644597441009


In [135]:
new_exp = pd.DataFrame({'YearsExperience': [2]})

# Predict
predicted_salary_value = model.predict(new_exp).item()  # get single number

# Format with comma and 1 decimal
print("Predicted Salary:", f"{predicted_salary_value:,.1f}")

Predicted Salary: 47,695.5


In [137]:
df

Unnamed: 0,YearsExperience,Salary
0,1.1,20000
1,1.5,30000
2,2.0,45000
3,2.2,49525
4,3.0,60150
5,3.2,65400
6,3.9,75000
7,4.5,81111
8,5.1,87938
9,6.0,93088


In [None]:
new_exp = pd.DataFrame({'YearsExperience': [4.5]})

# Predict
predicted_salary_value = model.predict(new_exp).item()  # get single number

# Format with comma and 1 decimal
print("Predicted Salary:", f"{predicted_salary_value:,.1f}")

**Accuracy**

In [148]:
n = len(Y)
k = X.shape[1]
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - k - 1)

print("R²:", r2)
print("Adjusted R²:", adj_r2)

R²: 0.9130350753280897
Adjusted R²: 0.9021644597441009
