In [4]:
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split
os.sys.path.append(os.path.dirname(os.path.abspath('.')))

# Import local modules
from src.data.generate_data import FrankeFunction
from src.models.models import Ridge
from src.evaluation.evaluation import mse, r_squared
from src.resampling.resampling import K_fold_splitter

In [5]:
df_X = pd.read_csv('../data/generated/X.csv', index_col=0)
df_z_no_noise = pd.read_csv('../data/generated/no_noise.csv', usecols=[1])
df_z_some_noise = pd.read_csv('../data/generated/some_noise.csv', usecols=[1])
df_z_noisy = pd.read_csv('../data/generated/noisy.csv', usecols=[1])

X = np.array(df_X)
z_no_noise = np.array(df_z_no_noise).ravel()
z_some_noise = np.array(df_z_some_noise).ravel()
z_noisy = np.array(df_z_noisy).ravel()

In [8]:
k = 10
lambda_values = [0, 0.1, 0.2, 0.5, 6.0]
targets = [{
    'name': 'No noise',
    'values': z_no_noise
},
{
    'name': 'Some noise (sigma 0.1)',
    'values': z_some_noise
},
{
    'name': 'Noisy (sigma 0.4)',
    'values': z_noisy
}]

col_names = ['MSE']
output_df = pd.DataFrame(columns=col_names)
print('%-30s|%-10s' %('', 'MSE'))

print('-'*50)

for target in targets:
    for lambda_value in lambda_values:
        print('Lambda', lambda_value)
        mse_values = np.zeros(k)
        z = target['values']
        ridge = Ridge(lambda_value)
    
        kfs = K_fold_splitter(X.shape[0], k)
        fold_indices = kfs.cross_val_split()
    
        for j, fold in enumerate(fold_indices):     
            X_train = X[fold['train_indices']]
            z_train = z[fold['train_indices']]
            X_test = X[fold['test_indices']]
            z_test = z[fold['test_indices']]
        
            ridge.fit(X_train, z_train)
            predictions = ridge.predict(X_test)
            mean_squared_error = mse(z_test, predictions)
        
            mse_values[j] = mean_squared_error
    
        mse_value = np.mean(mse_values)
        print('%-30s|%-10f' %(target['name'], mse_value))
        output_df = output_df.append(pd.DataFrame(data=[[mse_value]], columns=col_names, index=[target['name']]))

                              |MSE       
--------------------------------------------------
Lambda 0
No noise                      |0.002643  
Lambda 0.1
No noise                      |0.011181  
Lambda 0.2
No noise                      |0.012377  
Lambda 0.5
No noise                      |0.014915  
Lambda 6.0
No noise                      |0.025436  
Lambda 0
Some noise (sigma 0.1)        |0.012347  
Lambda 0.1
Some noise (sigma 0.1)        |0.020400  
Lambda 0.2
Some noise (sigma 0.1)        |0.021518  
Lambda 0.5
Some noise (sigma 0.1)        |0.024007  
Lambda 6.0
Some noise (sigma 0.1)        |0.034638  
Lambda 0
Noisy (sigma 0.4)             |0.171564  
Lambda 0.1
Noisy (sigma 0.4)             |0.170981  
Lambda 0.2
Noisy (sigma 0.4)             |0.171532  
Lambda 0.5
Noisy (sigma 0.4)             |0.173467  
Lambda 6.0
Noisy (sigma 0.4)             |0.183391  
