# Part a)

In [1]:
import os
os.sys.path.append(os.path.dirname(os.path.abspath('.')))
from src.data.generate_data import FrankeFunction
from src.models.models import OLS
from src.features.polynomial import PolynomialFeatures
from src.evaluation.evaluation import mse
import numpy as np

In [2]:
x = np.arange(0, 1, 0.05)
y = np.arange(0, 1, 0.05)
x, y = np.meshgrid(x, y)
x, y = x.ravel(), y.ravel()
z = FrankeFunction(x, y, noise=0.01, seed=43)

We now have the predictors $x$ and $y$ and the target values $z$. We proceed by building a feature matrix.

In [3]:
pf = PolynomialFeatures(4)
X = pf.fit_transform(np.c_[x.reshape((-1, 1)), y.reshape((-1, 1))])
print(X)

[[1.         0.         0.         ... 0.         0.         0.        ]
 [1.         0.05       0.         ... 0.         0.         0.        ]
 [1.         0.1        0.         ... 0.         0.         0.        ]
 ...
 [1.         0.85       0.95       ... 0.65205625 0.72876875 0.81450625]
 [1.         0.9        0.95       ... 0.731025   0.7716375  0.81450625]
 [1.         0.95       0.95       ... 0.81450625 0.81450625 0.81450625]]


In [4]:
ols = OLS()
ols.fit(X, z)

In [5]:
predictions = ols.predict(X)

# Compare some predictions to target values
print(predictions[34:40])
print(z[34:40])

[0.27985113 0.26550947 0.25611674 0.24548076 0.22586564 0.18799174]
[0.30502795 0.30526578 0.24190182 0.2435697  0.18566786 0.17013158]


In [7]:
# Compute mse for varying noise:
noises = [0.0, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0]
for noise in noises:
    z = FrankeFunction(x, y, noise=noise, seed=43)
    ols.fit(X, z)
    predictions = ols.predict(X)
    mean_squared_error = mse(z, predictions)
    print(mean_squared_error)

Number of observations: 400
0.004793879073370269
Number of observations: 400
0.004841074330285955
Number of observations: 400
0.00508242856359797
Number of observations: 400
0.006971445121911941
Number of observations: 400
0.014002985580361641
Number of observations: 400
0.04262798972698505
Number of observations: 400
0.2449983880046477
Number of observations: 400
0.9706003379267272
Number of observations: 400
3.877996560743292


We clearly see a how an increase in noise makes fitting harder.