In [1]:
import numpy as np
import pandas as pd

# Linear Regression

### Mathematical Representation

$$ y
^
​
 =X⋅w+b $$

 - **X** is matrix input with n_samples(rows) and n_features(rows)
 - **w** (weights) or vector cofficient of n_features
 - **b** (bias) is intercept
 - **y** predicted values



 ## Loss Function (Mean Squared Error)
 - Our main goal is to minimize this function

$$ \text{Loss} = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2 $$





### Gradient with respect to weights (**w**):

$$ \frac{\partial \text{Loss}}{\partial w_j} = -\frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i) \cdot X_{ij}
 $$

- Vectorized form
$$ \mathbf{dw} = \frac{1}{n} X^\top (\hat{\mathbf{y}} - \mathbf{y})
 $$

### Gradient with respect to bias (**b**):

 $$ \frac{\partial \text{Loss}}{\partial b} = -\frac{1}{n} \sum_{i=1}^n (y_i - \hat{y}_i)
 $$

In [2]:
class LinearRegression:
    def __init__(self, learning_rate, epochs) -> None:
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.epochs):
            y_pred = self.predict(X)

            dw = np.dot(X.T, (y_pred-y)) / n_samples
            db = np.mean(y_pred-y)

            self.weights -= self.learning_rate * dw
            self.bias  -= self.learning_rate * db
        
        return self.weights, self.bias        


In [5]:
np.random.seed(42)
n = 50


X1 = np.linspace(1, 100, n) + np.random.normal(0, 10, n) 
X2 = np.linspace(50, 200, n) + np.random.normal(0, 15, n)  
X3 = np.random.randint(10, 50, n) 


y = 3 * X1 + 2 * X2 + 5 * X3 + 10 + np.random.normal(0, 20, n)


df = pd.DataFrame({
    "Feature1": X1,
    "Feature2": X2,
    "Feature3": X3,
    "Target": y
})

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X = df.drop("Target", axis=1).to_numpy()
y = df["Target"].to_numpy()


X_scal = scaler.fit_transform(X)

In [12]:
from sklearn.metrics import r2_score

lr = LinearRegression(learning_rate=0.01, epochs=1000)

lr.fit(X_scal, y)
y_pred = lr.predict(X_scal)

r2_score(y, y_pred)

0.9865711634640058