### Linear Regression

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [None]:
# data
np.random.seed(0)
X = np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train.shape, X_test.shape

((80, 1), (20, 1))

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

mse, rmse

(0.8111606286982094, 0.9006445629093696)

from scratch

In [None]:
X = np.array([
    [1,1],
    [1,2],
    [2,2],
    [2,3]
])

In [None]:
y = np.array([6, 8, 9, 11])

In [None]:
X.shape, y.shape

((4, 2), (4,))

In [None]:
# constants
no_data_points, no_features = X.shape
learning_rate = 0.05
num_iteration = 10000
weights = np.zeros(no_features)
bias = 0
mse = 0
eps = 1e-12

In [None]:
# sample

sample = np.dot(X, weights) + bias
sample

array([0., 0., 0., 0.])

In [None]:
# sample gradient
1/no_data_points * np.dot(X.T, sample)

array([0., 0.])

In [None]:
# GD

for i in range(num_iteration):
  linear_model = np.dot(X, weights) + bias

  error = linear_model - y
  error_applied_to_features = np.dot(X.T, error)

  gradient_weight  = (1 / no_data_points) * error_applied_to_features
  gradient_bias = (1 / no_data_points) * np.sum(error)

  weights -= learning_rate * gradient_weight
  bias -= learning_rate * gradient_bias

  if i & 250 == 0:
    linear_model = np.dot(X, weights) + bias
    prev = mse
    mse = np.mean((linear_model - y)**2)
    if abs(prev - mse) < eps:
      print(f'stopping in weights : {weights}, bias: {bias}')
      break
    print("Loss - ", mse)

stopping in weights : [1.00001699 1.99999766], bias: 2.9999775795521044


In [None]:
def predict(X):
  return np.dot(X, weights) + bias

### Decision Tree Regression

In [None]:
from sklearn.tree import DecisionTreeRegressor

In [None]:
X

array([[1, 1],
       [1, 2],
       [2, 2],
       [2, 3]])

In [None]:
y

array([ 6,  8,  9, 11])

In [None]:
regressor = DecisionTreeRegressor(max_depth=5)
regressor.fit(X, y)

In [None]:
y_pred = regressor.predict(X)
mse = np.mean((y - y_pred) ** 2)
mse

0.0

It is overfitted