# Test the functions in Project 1

This is one of three mandatory projects of the course FYS-STK4155 (University of Oslo). In this file, we test the performance of our own functions by comparing them with the functions in the scikit learn python package.

The Frank2 function was given for this project, so we are not going to test it.

In [1]:
def franke_function(x, y):
    term1 = 0.75 * np.exp(-(0.25*(9*x-2)**2) - 0.25*((9*y-2)**2))
    term2 = 0.75 * np.exp(-((9*x+1)**2) / 49.0 - 0.1*(9*y+1))
    term3 = 0.5 * np.exp(-(9*x-7)**2/4.0 - 0.25*((9*y-3)**2))
    term4 = -0.2 * np.exp(-(9*x-4)**2 - (9*y-7)**2)
    return term1 + term2 + term3 + term4

Import packages

In [2]:
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.linear_model import LinearRegression 
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score, mean_squared_log_error, mean_absolute_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import algorithms
from model_selection import GridSearch #use Gridsearch.fit(x,y)
from model_comparison import model_comparison
#from functions import generateDesignmatrix

#  1) Test the function generateDesignmatrix in functions.py

This functions generates a design matrix for the arrays x and y, and a given polynonial order p. The arrays x and y are the ones that define the meshgrid for the calculation of z with the Franke function.

In [3]:
def generateDesignmatrix(p, x, y): 
    m = int((p**2+3*p+2)/2) # returnerer heltall for p = [1:5]
    X = np.zeros((len(x), m))
    X[:,0] = 1
    counter = 1
    for i in range(1, p+1):
        for j in range(i+1):
            X[:,counter] = x**(i-j) * y**j
            counter+=1
    return X

In [4]:
np.random.seed(1000)
# We work only with squared matrices. This implies that the length of x and y is the same.
m = 5  # m defines the size of the meshgrid.
x = np.random.rand(m, )
y = np.random.rand(m, )
z = franke_function(x, y)

# x, y, and z are arrays. The length of x and y is m, which determines the dimensions of z (mxm).

In [5]:
print('array x:', x)
print('array y:', y)
print('array z:', z)

array x: [0.65358959 0.11500694 0.95028286 0.4821914  0.87247454]
array y: [0.21233268 0.04070962 0.39719446 0.2331322  0.84174072]
array z: [0.50251014 0.90614694 0.32573166 0.56740664 0.0665042 ]


In [6]:
# Test the designMatrix for p=0. 
# When p=0 we expect the designMatrix functions to generate a matrix X which only consist of 
# the terms that correspond to the intercept.
# This means that X has only a column of length equal to the length of x and y, and all its elements are 1.
p = 0 # [1:5]
X = generateDesignmatrix(p,x,y)
print('Design matrix:', X) 

Design matrix: [[1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [7]:
# Test the designMatrix for p=0. 
# When p=0 we expect the designMatrix functions to generate a matrix X with three columns, the first filled with ones, 
# as we have seen before, and the second and third with the values of x and y.
p = 1 # [1:5]
X = generateDesignmatrix(p,x,y)
print('Design matrix:', X) 

Design matrix: [[1.         0.65358959 0.21233268]
 [1.         0.11500694 0.04070962]
 [1.         0.95028286 0.39719446]
 [1.         0.4821914  0.2331322 ]
 [1.         0.87247454 0.84174072]]


In [8]:
# More general, the columns of the design matrix will be:
# ones, x, y, x^2, y^2, xy,..., x^py^0,..., x^(p-i)y^i, ..., x^0y^p.
# Thus, for a given p, we have t = (p^2+3p+2)/2 number of columns in the design matrix X.

In [9]:
# Now, let's try with a higher order poynomial.
p = 3

X = generateDesignmatrix(p,x,y)
print('Design matrix:', X) 


Design matrix: [[1.00000000e+00 6.53589585e-01 2.12332681e-01 4.27179346e-01
  1.38778429e-01 4.50851674e-02 2.79199972e-01 9.07041358e-02
  2.94671959e-02 9.57305446e-03]
 [1.00000000e+00 1.15006943e-01 4.07096248e-02 1.32265970e-02
  4.68188950e-03 1.65727355e-03 1.52115049e-03 5.38449799e-04
  1.90597965e-04 6.74669843e-05]
 [1.00000000e+00 9.50282864e-01 3.97194461e-01 9.03037522e-01
  3.77447090e-01 1.57763440e-01 8.58141083e-01 3.58681502e-01
  1.49919894e-01 6.26627646e-02]
 [1.00000000e+00 4.82191401e-01 2.33132197e-01 2.32508548e-01
  1.12414341e-01 5.43506214e-02 1.12113622e-01 5.42052286e-02
  2.62074023e-02 1.26708798e-02]
 [1.00000000e+00 8.72474535e-01 8.41740724e-01 7.61211815e-01
  7.34397347e-01 7.08527447e-01 6.64137924e-01 6.40742984e-01
  6.18172155e-01 5.96396406e-01]]


In [10]:
# As expected, X has 10 columns. t = (3^3 + 3*3 + 2)/2 = 10
# The first column consists of ones, the second and the third are the vectors x and y.

# Let's check wether the column number 9 is equal to x*y^2

y2 = y**2
col9 = X[:, 8]

print('Has the ninth column in X the values we expect:', col9==x*y2)

Has the ninth column in X the values we expect: [ True  True  True  True  True]
