# Task 0

In [6]:
import pandas as pd
import numpy as np

from typing import List, Union, Optional, Tuple

train: pd.DataFrame = pd.read_csv('train.csv')
test: pd.DataFrame = pd.read_csv('test.csv')

In [2]:
train.head()

Unnamed: 0,Id,y,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,0,738.023171,1764.052346,400.157208,978.737984,2240.893199,1867.55799,-977.27788,950.088418,-151.357208,-103.218852,410.598502
1,1,400.646015,144.043571,1454.273507,761.037725,121.675016,443.863233,333.674327,1494.079073,-205.158264,313.067702,-854.095739
2,2,189.900156,-2552.989816,653.618595,864.436199,-742.16502,2269.754624,-1454.365675,45.758517,-187.18385,1532.779214,1469.35877
3,3,-78.399449,154.947426,378.16252,-887.785748,-1980.796468,-347.912149,156.348969,1230.290681,1202.379849,-387.326817,-302.302751
4,4,-547.373531,-1048.552965,-1420.017937,-1706.270191,1950.775395,-509.652182,-438.074302,-1252.79536,777.490356,-1613.897848,-212.74028


In [3]:
train_mat: np.ndarray = train.to_numpy()
train_y: np.ndarray = train_mat[:, 1]
train_x: np.ndarray = train_mat[:, 2:]

In [14]:
class Objective:
    def __call__(self, w):
        raise NotImplementedError()

    def grad(self, w):
        raise NotImplementedError()


class ObjFunc(Objective):
    def __init__(self, y: Union[np.ndarray, float], phi: np.ndarray):
        self.y = y.astype(np.float64)
        self.phi = phi.astype(np.float64)  # feature input
        self.n = self.y.shape[0]

    def __call__(self, w: np.ndarray) -> Union[float, np.ndarray]:
        return np.linalg.norm(self.y - self.phi @ w) ** 2 / self.n

    def grad(self, w: np.ndarray) -> np.ndarray:
        return 2* self.phi.T @ (self.phi @ w - self.y) / self.n

In [7]:
def gradient_descent(obj_func: ObjFunc, w_init: np.ndarray, lr: float, tol: float, max_steps: int) -> Tuple[np.ndarray, List, List]:
    w_curr = w_init
    w_hist: List[np.ndarray] = [w_init]
    obj_hist: List[Union[float, np.ndarray]] = [obj_func(w_curr)]
    for step in range(max_steps):
        direction: np.ndarray = obj_func.grad(w_curr)
        w_next = w_curr - lr * direction
        if np.allclose(w_next, w_curr, tol):
            break
        w_curr = w_next
        w_hist.append(w_curr)
        obj_hist.append(obj_func(w_curr))
    return w_curr, w_hist, obj_hist

In [12]:
# hyperparams

learning_rate = 0.001
tol = 1e-6
n_steps = 10000

In [17]:
phi = train_x.copy().astype(np.float64)
phi = np.insert(phi, 0, 1, axis=1)

In [None]:
w_init = np.random.random(10)

fitting = ObjFunc(y=train_y, phi=phi)

w_res, _, _ = gradient_descent(fitting, w_init, lr=learning_rate, tol=tol, max_steps=n_steps)

In [20]:
# use scikit_learn to compare the results
from sklearn.linear_model import LinearRegression

reg = LinearRegression().fit(phi, train_y)
reg.coef_

array([0. , 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])