In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
%matplotlib notebook

In [3]:
def dataset_generator(init, end, sample, mean, var, function):
    """
    Return X,Y,t being:
    x input
    y deterministic output
    t data with noise
    """
    sample = int(sample)
    x = (np.array([np.linspace(init,end,sample)])).T
    y = (function(x))
    e = (np.array([np.random.normal(mean,var,sample)])).T
    t = y + e

    return x,y,t,e

In [4]:
def linear_regressor(x,t,function,m):
    """
    Return the model trained
    """
    #m = 500;
    phi = np.zeros((len(x),m))
    mu = np.array([np.linspace(min(x),max(x),m)]).T
    for i in range(len(x)):
        for j in range(m):
            phi[i][j] = function(x[i],mu[j])
    w = np.linalg.pinv(phi)@t
    y_pred = phi@w

    return y_pred,w

In [5]:
def split_data(x,t,y_pred):
    from math import floor
    n = floor(len(x)/3)
    x_train , x_valid , x_test = x[0:n], x[n+1:2*n] , x[2*n+1:-1]
    t_train , t_valid , t_test = t[0:n], t[n+1:2*n] , t[2*n+1:-1]
    
    return x_train , x_valid , x_test, t_train , t_valid , t_test
    

In [6]:
def f_deterministic(x):
    """
    Return the deterministic function.
    """
    return 2*np.sin(2*np.pi*x) + 2*x

In [7]:
def f_basis(x,mu,s=1):
    """
    Return gaussian function with var = 1.
    """
    return np.e**(-(x-mu)**2 / s**2)   

#### Initializing

In [8]:
x,y,t,e = dataset_generator(0,12,100,0,0.8**2,f_deterministic)

In [9]:
plt.figure();
plt.plot(x,y,label='Deterministic');
plt.plot(x,t,'o',label='Data',markerfacecolor="None");
plt.legend();

<IPython.core.display.Javascript object>

m = 500;
phi = np.zeros((len(x),m))
mu = np.array([np.linspace(min(x),max(x),m)]).T
for i in range(len(x)):
    for j in range(m):
        phi[i][j] = f_basis(x[i],mu[j])
w = np.linalg.pinv(phi)@t
y_pred = phi@w

print(x.shape,t.shape,y_pred.shape)

In [10]:
y_pred,w = linear_regressor(x,t,f_basis,100)

In [11]:
plt.figure();
plt.plot(x,y,label='Deterministic');
plt.plot(x,t,'o',label='Data',markerfacecolor="None");
plt.plot(x,y_pred,'+',label='Prediction',markerfacecolor="None");
plt.legend();

<IPython.core.display.Javascript object>

In [12]:
x_train, x_valid, x_test, t_train, t_valid, t_test = split_data(x, t, y_pred)

In [13]:
y_train,w_train = linear_regressor(x_train,t_train,f_basis,10)
plt.figure(figsize=(3,2));
plt.plot(x,y,label='Deterministic');
plt.plot(x,t,'o',label='Data',markerfacecolor="None");
plt.plot(x_train,y_train,'+',label='Prediction',markerfacecolor="None");
plt.legend();

<IPython.core.display.Javascript object>

In [55]:
W_ml =[];
Error_valid = [];

for i in range(len(x)):
    y_train,w_train = linear_regressor(np.append(x_train,x_valid),np.append(t_train,t_valid),f_basis,i)
    E_rms = (sum((y_train - np.append(t_train,t_valid)) ** 2) / (len(y_train)+len(t_valid))) ** 0.5
    Error_valid = np.append(Error_valid,E_rms)

In [56]:
W_ml =[];
Error = [];
for i in range(len(x)):
    y_train,w_train = linear_regressor(x_train,t_train,f_basis,i)
    E_rms = (sum((y_train - t_train) ** 2) / len(y_train)) ** 0.5
    Error = np.append(Error,E_rms)

In [58]:
plt.figure()
plt.plot(np.arange(len(Error_valid)),Error_valid,label="Validation");
plt.plot(np.arange(len(Error)),Error,label="Train");
plt.legend();

<IPython.core.display.Javascript object>

In [98]:
plt.figure();
plt.plot(np.array([Error_valid - Error]).T);

<IPython.core.display.Javascript object>

In [100]:
y_test,w_test = linear_regressor(np.append(x_train,np.append(x_test,x_valid)),np.append(t_train,np.append(t_test,t_valid)),f_basis,30)
plt.figure();
plt.plot(x,y,label='Deterministic');
plt.plot(x,t,'o',label='Data',markerfacecolor="None");
plt.plot(np.append(x_train,np.append(x_test,x_valid)),y_test,'+',label='Prediction',markerfacecolor="None");
plt.legend();

<IPython.core.display.Javascript object>