# Exercise 04

## Linear regression with gradient descent

In [1]:
import numpy as np
import sklearn.datasets
import sklearn.metrics
from numpy.linalg import inv
import sklearn.model_selection

Implement the linear regression model with full-batch gradient descent. You can copy parts of the code from last week's exercise.

In [None]:
class LinearRegressorGD():
    def __init__(self) -> None:
        self.w = None

    def _add_constant(self, X: np.ndarray):
        return np.hstack((X, np.ones((len(X), 1))))

    def fit(
        self,
        X: np.ndarray,
        y: np.ndarray,
        learning_rate: float = 1e-3,
        n_epochs: int = 500,
        random_state: int = 42,
    ) -> None:
        """Fit the parameters of the model to the data with gradient descent.

        Args:
            X (np.ndarray): features
            y (np.ndarray): targets
            learning_rate (float): step size of gradient descent
            n_epochs (int): number of parameter updates
            random_state (int): seed for reproducibility
        """
        # modify the features such that a bias can be learned easily
        X = self._add_constant(X)
        # initialize randomly
        rng = np.random.default_rng(random_state)
        self.w = rng.standard_normal(size=(X.shape[1], ))
        
        # gradient descent
        for _ in range(n_epochs):
            self.w = self.w - learning_rate * self._gradient_descent(X, y)

    def _gradient_descent(self, X,  y: np.ndarray):
            return X.T @ X @ self.w -  X.T @ y

    def predict(self, X: np.ndarray) -> np.ndarray:
        """Use parameters to predict values

        Args:
            X (np.ndarray): features

        Returns:
            np.ndarray: predicted targets
        """
        X = self._add_constant(X)
        return X @ self.w

X, y, true_coefs = sklearn.datasets.make_regression(n_samples=100, n_features=50, n_informative=3, random_state=0, coef=True, noise=10)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, random_state=0, train_size=0.7)

model = LinearRegressorGD()
model.fit(X_train, y_train, learning_rate=1e-3, n_epochs=100)

y_pred = model.predict(X_train)
print("R-squared (train)", sklearn.metrics.r2_score(y_train, y_pred))

y_pred = model.predict(X_test)
print("R-squared (test)", sklearn.metrics.r2_score(y_test, y_pred))

Now implement the linear regression model with mini-batch gradient descent.

In [27]:
class LinearRegressorSGD(LinearRegressorGD):
    def fit(
        self,
        X: np.ndarray, 
        y: np.ndarray, 
        batch_size: int,
        learning_rate: float=1e-3, 
        n_epochs: int=500, 
        random_state: int=42,
        ) -> None:
        """Fit the parameters of the model to the data with stochastic gradient descent.

        Args:
            X (np.ndarray): features
            y (np.ndarray): targets
            batch_size (int): number of examples in a batch
            learning_rate (float): step size of gradient descent
            n_epochs (int): number of parameter updates
            random_state (int): seed for reproducibility 
        """
        # modify the features such that a bias can be learned easily
        X = self._add_constant(X)
        
        # initialize randomly
        rng = np.random.default_rng(random_state)
        self.w = rng.standard_normal(size=(X.shape[1], ))
        # stochastic gradient descent
        for _ in range(n_epochs):
            indices = np.arange(len(X))
            rng.shuffle(indices)
            batch_indices = np.array_split(indices, np.ceil(len(X) / batch_size))
        
            for step, idx in enumerate(batch_indices):
                print("1",idx)
                X_batch = X[idx, :]
                print(X_batch)
                y_batch = y[idx]
                self.w = self.w - learning_rate * self._gradient_descent(X_batch, y_batch)
            

model = LinearRegressorSGD()
model.fit(X_train, y_train, batch_size=50, learning_rate=1e-2, n_epochs=10)

y_pred = model.predict(X_train)
print("R-squared (train)", sklearn.metrics.r2_score(y_train, y_pred))

TypeError: only integer scalar arrays can be converted to a scalar index