# Machine learning using EKF

In [1]:
from IPython import display
import os
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from sklearn.model_selection import train_test_split
import seaborn as sns
from load import load_abalone_data,load_bikes_data
# Importing Pytorch libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
from  sklearn.datasets import make_regression
from sklearn.datasets import load_boston
from tqdm import tqdm 

In [2]:
# Select device which you are going to use for training
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(device)

cpu


### Import Data Sets
Testing using a toy sine data

In [3]:
X,y = load_bikes_data()
print(y.shape)
print(X.shape)

(17379,)
(17379, 14)


## Data Partition


In [4]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_scaled = scaler.fit_transform(X)

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_scaled,y,test_size=.2)
print(X_train.shape)
print(X_test.shape)

# x_train_scaled = scaler.fit_transform(x_train)
# x_test_scaled = scaler.transform(x_test)

(13903, 14)
(3476, 14)


## Define Neural network

In [6]:
class MLP(nn.Module):
    def __init__(self, n_inputs, n_hidden_layer, n_outputs,bias=True):
        super(MLP, self).__init__()
        # YOUR CODE HERE
        #raise NotImplementedError()
        self.fc1 = nn.Linear(n_inputs, n_hidden_layer, bias)
        self.fc2 = nn.Linear(n_hidden_layer, n_hidden_layer, bias)
        #self.fc3 = nn.Linear(n_hidden_layer, n_hidden_layer, bias)
        self.fc4 = nn.Linear(n_hidden_layer, n_outputs, bias)
        

    def forward(self, x):
        # YOUR CODE HERE
        #raise NotImplementedError()
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        #x = torch.tanh(self.fc3(x))
        x = self.fc4(x)
        return x    

## Using EKF for learning 

In [7]:
  
def getWeights(net):
    weight_mat = []
    for name,param in net.named_parameters():
        if (len(list(param.data.shape)) == 2):
            weight_mat.append(param.data.flatten())
        
    weight_mat = torch.cat(weight_mat, dim=0)       
    return weight_mat.view(-1, 1)

def getWeightsgrad(net):
    weight_grad_mat = []
    for name,param in net.named_parameters():
        if (len(list(param.grad.shape)) == 2):
            weight_grad_mat.append(param.grad.flatten())
    weight_grad_mat = torch.cat(weight_grad_mat)       
    return weight_grad_mat.view(-1, 1)

def setWeights(net, weight_mat):
    mem_ind = 0;
    for name,param in net.named_parameters():
        if (len(list(param.data.shape)) == 2):
            param.data = weight_mat[mem_ind:mem_ind+torch.numel(param.data)].view(param.data.shape)
            mem_ind = torch.numel(param.data)
    


In [14]:
# Define number of Input and Output layers
torch.set_default_dtype(torch.float64)
n_inputs = X_train.shape[1]
n_outputs = 1
n_hidden_layer = 20

mlp_EKF = MLP(n_inputs,n_hidden_layer, n_outputs, bias = False)
mlp_EKF = mlp_EKF.to(device)
n_epochs = 1

# Define EKF covariances
weight_mat = getWeights(mlp_EKF).to(device)
print(f"Shape of W:{weight_mat.shape}")
# System Noise or also known as training  noise  
Q = 1e-16*torch.eye(weight_mat.shape[0],device=device, dtype=torch.float64)
# Measurement noise or noise in targets 
R = 10*torch.eye(n_outputs,device=device, dtype=torch.float64)
#Covariance Matrix
P = 100*torch.eye(weight_mat.shape[0],device=device, dtype=torch.float64)
print(f"Shape of P:{P.shape}")

print(f"network {mlp_EKF}")
print(weight_mat.shape)



Shape of W:torch.Size([700, 1])
Shape of P:torch.Size([700, 700])
network MLP(
  (fc1): Linear(in_features=14, out_features=20, bias=False)
  (fc2): Linear(in_features=20, out_features=20, bias=False)
  (fc4): Linear(in_features=20, out_features=1, bias=False)
)
torch.Size([700, 1])


In [15]:
x_tensor = torch.tensor(X_train, device=device, dtype=torch.float64)
y_tensor = torch.tensor(y_train, device=device, dtype=torch.float64)


for epoch in range(n_epochs):
    
    outputs = [] 
    #Calling Backward for each sample
    for i in tqdm(range(x_tensor.shape[0])):
        output = mlp_EKF(x_tensor[i])
        outputs.append(output)

        mlp_EKF.zero_grad()
        output.backward(torch.ones_like(output))
        #calculate loss
        loss = (y_tensor[i]-output).view(1,-1)
        H = getWeightsgrad(mlp_EKF).to(device).view(1,-1)
        
        #update weights using EKF filter Update
        intermediate = torch.mm(torch.mm(H, P), torch.t(H))

        Ak = torch.inverse(R + intermediate)

        Kk = torch.mm(torch.mm(P, torch.t(H)), Ak)

        weight_mat = weight_mat + torch.mm(Kk, loss)
        P = P + Q - torch.mm(torch.mm(Kk,H),P)
        setWeights(mlp_EKF,weight_mat)


100%|████████████████████████████████████████████████████████████████████████████| 13903/13903 [03:06<00:00, 78.08it/s]


In [16]:
from sklearn.metrics import mean_absolute_error,mean_squared_error
with torch.no_grad():
    x_test = torch.tensor(X_test, device=device, dtype=torch.float64)
    y_pred = mlp_EKF.forward(x_test)
    y_pred = y_pred.cpu().data.numpy()
    error = mean_squared_error(y_test,y_pred)
    print(np.sqrt(error))

40.99807593684918
