# Part 2: Resampling and confidence intervall

In [1]:
import numpy as np
import algorithms
from model_selection0 import GridSearchNew #use Gridsearch.fit(x,y)
from model_comparison0 import model_comparison0
from model_resample0 import model_resample
from utils import generateDesignmatrix, franke_function, ci, plotCI, clean_reg_coeff
from scipy import stats
import scipy.stats as st
import matplotlib.pyplot as plt
from imageio import imread
%matplotlib notebook 
# creates the toolbars with zoom buttons on plots.

## Franke function 

In [2]:
np.random.seed(1000)
x = np.random.rand(1000, )
y = np.random.rand(1000, )
z = franke_function(x, y)
p = 5 
X = generateDesignmatrix(p,x,y)

In [3]:
# Experimental setup
models = {
    'ridge': algorithms.Ridge, 
    "ols": algorithms.OLS, 
    "lasso": algorithms.Lasso
}

lmd = {
    'ridge': [0.01], 
    'ols': [0], 
    'lasso': [0.01]
}
nboots = 100

In [4]:
mse_avg_test, r2_avg_test, reg_coeffs, bias_model_test, mv_test, mse_avg_train, r2_avg_train,  bias_model_train,  mv_train = model_resample(models, lmd, X, z, nboots, split_size = 0.2)

In [5]:
a,b,c, d, e, f, g, h, i = clean_reg_coeff(X, reg_coeffs, nboots)
plotCI(X, a,b,c, d, e, f, g, h, i)
plt.title("Confidence intervals of test data for Ridge, OLS and Lasso - Franke function", fontsize = 15)
plt.savefig("Confidenceintervall_Franke.png")
plt.show()

<IPython.core.display.Javascript object>

In [6]:
mse_avg_test, mse_avg_train
# train has a lower value than test. OK!

({'lasso': 0.023208715114153927,
  'ols': 0.0021909392120651335,
  'ridge': 0.005335309668846603},
 {'lasso': 0.022750957177343652,
  'ols': 0.001987775137188555,
  'ridge': 0.005074728779967853})

In [7]:
r2_avg_test, r2_avg_train 
# train is higher than test. OK!

({'lasso': 0.7085971200800156,
  'ols': 0.9723086658976361,
  'ridge': 0.9328711076745759},
 {'lasso': 0.7184912702727699,
  'ols': 0.9754061543783944,
  'ridge': 0.937209172239495})

In [8]:
bias_model_test, bias_model_train

({'lasso': 0.0008033542564284679,
  'ols': 0.0023643402561452365,
  'ridge': 0.002401087833078297},
 {'lasso': 0.00014606926816390642,
  'ols': 0.00014606926772792184,
  'ridge': 0.00015750839551870177})

In [9]:
mv_test, mv_train

({'lasso': 7.005507285384737e-16,
  'ols': -9.925393840148899e-16,
  'ridge': -3.241851231905457e-16},
 {'lasso': -1.0880185641326535e-15,
  'ols': 3.0686564400639327e-15,
  'ridge': -2.415845301584341e-15})

## Real data: Flekkefjord 

In [10]:
# Experimental setup
models = {
    'ridge': algorithms.Ridge, 
    "ols": algorithms.OLS, 
    "lasso": algorithms.Lasso
}

lmd = {
    'ridge': [0.01], 
    'ols': [0], 
    'lasso': [0.01]
}
nboots = 100

In [11]:
terrain_F = imread("Flekkefjord.tif")

lengthx = 100
lengthy = 100
x = np.linspace(1, lengthx, num=lengthx)
y = np.linspace(1+80, lengthy+80, num=lengthy)

#x = np.arange(lengthx)
#y = np.arange(lengthy)

x = np.squeeze(np.tile(x, [1,100]))
y = np.squeeze(np.tile(y, [1,100]))
z = np.ndarray.flatten(terrain_F)
p = 3 
X = generateDesignmatrix(p, x, y)
print(X.shape)

(10000, 10)


  Functionality might be degraded or be slow.

  Functionality might be degraded or be slow.

  Functionality might be degraded or be slow.

  Functionality might be degraded or be slow.



In [12]:
mse_avg_test, r2_avg_test, reg_coeffs, bias_model_test, mv_test, mse_avg_train, r2_avg_train,  bias_model_train,  mv_train   = model_resample(
    models, lmd, X, z, nboots, split_size = 0.2)



In [13]:
a,b,c, d, e, f, g, h, i = clean_reg_coeff(X, reg_coeffs, nboots)

In [14]:
%matplotlib notebook
plotCI(X, a,b,c, d, e, f, g, h, i)
plt.title("Confidence intervals Ridge, OLS and Lasso - Flekkefjord", fontsize = 15)
plt.savefig("Confidenceintervall_F.png")
plt.show()

<IPython.core.display.Javascript object>

In [15]:
mse_avg_test, mse_avg_train

({'lasso': 139090.26546921756,
  'ols': 10449585928.069939,
  'ridge': 145536.83097848695},
 {'lasso': 139010.1237790425,
  'ols': 10619482337.115227,
  'ridge': 145416.78112083027})

In [16]:
r2_avg_test, r2_avg_train

({'lasso': 0.002708320093550517,
  'ols': -76301.46998084764,
  'ridge': -0.0435631280532197},
 {'lasso': 0.003920859095109963,
  'ols': -76147.27333451819,
  'ridge': -0.04189591206909495})

In [17]:
bias_model_test, bias_model_train

({'lasso': 272.7195798842065,
  'ols': 6657.491140173527,
  'ridge': 276.55624156402826},
 {'lasso': 272.78751302307774,
  'ols': 6781.212203432955,
  'ridge': 276.6188770004451})

In [18]:
# model variance
mv_test, mv_train

({'lasso': 1.539319782750681e-12,
  'ols': 1.739244908094406e-09,
  'ridge': -9.046061677508987e-12},
 {'lasso': -5.027004590374418e-11,
  'ols': -1.0739313438534737e-08,
  'ridge': 6.48765308142174e-11})

## Real data: Montevideo

In [19]:
terrain_M = imread("Montevideo.tif")

x = np.linspace(1, lengthx, num=lengthx)
y = np.linspace(1+80, lengthy+80, num=lengthy)

#x = np.arange(lengthx)
#y = np.arange(lengthy)

x = np.squeeze(np.tile(x, [1,100]))
y = np.squeeze(np.tile(y, [1,100]))
z = np.ndarray.flatten(terrain_F)
p = 3
X = generateDesignmatrix(p, x, y)
z = np.ndarray.flatten(terrain_M)

#mse_M, r2_M, reg_coeffs_M, bias_M, model_variance_M

mse_avg_test, r2_avg_test, reg_coeffs, bias_model_test, mv_test, mse_avg_train, r2_avg_train,  bias_model_train,  mv_train   = model_resample(
    models, lmd, X, z, nboots, split_size = 0.2)



In [20]:
a,b,c, d, e, f, g, h, i = clean_reg_coeff(X, reg_coeffs, nboots)
"""
a - mean ridge 
b - ci high 
c - ci lox

2. ols
3. lasso
"""

%matplotlib notebook
plotCI(X, a,b,c, d, e, f, g, h, i)
plt.title("Confidence intervals Ridge, OLS and Lasso - Montevideo", fontsize = 15)
plt.savefig("Confidenceintervall_M.png")
plt.show()

<IPython.core.display.Javascript object>

In [21]:
mse_avg_test, mse_avg_train

({'lasso': 475.4106863969924,
  'ols': 582976897006.3656,
  'ridge': 592.9868514696104},
 {'lasso': 474.45556723218743,
  'ols': 580166898312.6655,
  'ridge': 591.1154039905294})

In [22]:
r2_avg_test, r2_avg_train

({'lasso': -0.0002574973270527714,
  'ols': -1225495370.2216156,
  'ridge': -0.24836112334381488},
 {'lasso': 0.0010104224294194097,
  'ols': -1207486755.2930934,
  'ridge': -0.2458997069830339})

In [23]:
bias_model_test, bias_model_train

({'lasso': 60.263720830115915,
  'ols': 73587.43514767394,
  'ridge': 59.63183338577136},
 {'lasso': 60.266237495093705,
  'ols': 73348.59449791668,
  'ridge': 59.65740858821469})

In [24]:
mv_test, mv_train

({'lasso': 8.468248324788874e-13,
  'ols': -1.6253479770966805e-08,
  'ridge': -2.0307311388023662e-13},
 {'lasso': -5.87561999054742e-12,
  'ols': -2.5120752980001272e-08,
  'ridge': -8.941469786805101e-13})