# Parametric Regression  

In this notebook, we attemps to find optimal parameters for the model by using various nonlinear parametric methods

In [14]:
import os
import time
import shutil
import contextlib
import numpy as np
import matplotlib.pyplot as plt


from helper import *
from models import *
from solvers.euler import *
from data_generator import *
from solvers.gn_solver import *
from solvers.rk_solvers import *


print(f"Your machine has {os.cpu_count()} cores.")


Your machine has 8 cores.


## 1 Gauss Newton

First obtain some data as a list of tuples `total`. The first component is the initial conditions of `(201,2)` while the second is of the shape `(1000, 201, 2)`

In [15]:
N = 100

start = time.time()

total_model_4 = generateData(model4,
        num_data = 10,
        init_sty = 'random',
        times = (0, 20),
        params = {'no. of prey': N, 
    'kappa for prey': 0.5, 
    'attraction of prey a': 0, 
    'repulsion of prey b_1': 0.15, 
    'repulsion of pred b_2': 0.55,
    'p1 spotted': 0.8,
    'p2 not spotted':0.4,
    'angle_prey': np.cos(np.pi / 3),
    'angle_pred': np.cos(np.pi / 2.5), 
    'num_neighbours': 5,
    'attraction of pred c': 10, 
    'exponent of dist pred p': 3},
        steps = 1000,
        second_order = False,
        method = 'rk2',
        return_vel = False,
        cores = 1,
        flattened=True)
end = time.time()
print(f"Time taken: {end-start} seconds.")

Trying to use multiprocessing...
Multiprocessing successful.
Time taken: 28.752002954483032 seconds.


Alternatively, we generate data using `model1`, the simplest of all models.

In [16]:
# trying a simpler model
start = time.time()

total = generateData(model1,
        num_data = 100,
        init_sty = 'random',
        times = (0, 20),
        params = {'no. of prey': N, 
    'kappa for prey': 0.5, 
    'attraction of prey a': 1, 
    'repulsion of prey b_1': 1, 
    'repulsion of pred b_2': 0.07, 
    'attraction of pred c': 10, 
    'exponent of dist pred p': 1.2},
        steps = 1000,
        second_order = False,
        method = 'rk2',
        return_vel = False,
        cores = 1,
        flattened=True)
end = time.time()
print(f"Time taken: {end-start} seconds.")

Trying to use multiprocessing...
Multiprocessing successful.
Time taken: 211.06027483940125 seconds.


We still need a function that takes in parameter vector instead of a dictionary for regression; we take `attraction of prey a`, `repulsion of prey b_1`, `repulsion of pred b_2` as parameters for regression.

In [17]:
def fModel4(x, vec):
    """
    Returns a flattened output.  

    Args:  
        - x: ndarray, initial conditions
        - vec: ndarray, parameters of length 3
    
    """  
    params = {'no. of prey': N, 
    'kappa for prey': 0.5, 
    'attraction of prey a': vec[0], 
    'repulsion of prey b_1': vec[1], 
    'repulsion of pred b_2': vec[2],
    'p1 spotted': 0.8,
    'p2 not spotted':0.4,
    'angle_prey': np.cos(np.pi / 3),
    'angle_pred': np.cos(np.pi / 2.5), 
    'num_neighbours': 5,
    'attraction of pred c': 10, 
    'exponent of dist pred p': 3}

    N = 100
    times = (0,20)
    steps = 1000
    start, end = times  
    h = (end - start) / steps
    x = x.reshape(-1, 2) # reshape to coordinate form of initial conditions
    y = rk2(model4, x, h, 2*N+1, times, params, return_vel=False, verbose=False)

    return np.ravel(y)

We try to extract just one parameter, `attraction of prey a`

Feed in one sample and try the `GaussNewtonSolver`. Note it might be better to drop the zero values in the input and to remove some configurations at some time steps of $y$

In [23]:
def fModel1(x, vec):
    """
    Returns a flattened output.  

    Args:  
        - x: ndarray, initial conditions
        - vec: ndarray, parameters of length 1
    
    """  

    params = {'no. of prey': N, 
    'kappa for prey': 0.5, 
    'attraction of prey a': vec[0], 
    'repulsion of prey b_1': 1, 
    'repulsion of pred b_2': 0.07, 
    'attraction of pred c': 10, 
    'exponent of dist pred p': 1.2}
    
    times = (0,20)
    steps = 1000
    start, end = times  
    h = (end - start) / steps
    x = x.reshape(-1, 2) # reshape to coordinate form of initial conditions
    y = rk2(model1, x, h, 2*N+1, times, params, return_vel=False, verbose=False)

    return np.ravel(y)[::100]

We also apply the same augmentation to data

In [19]:
x0 = total[0][0]
y0 = total[0][1]  
print(y0.shape)
# only keep times that are 100 apart  
y0 = y0[::100]
print(len(y0))

(402000,)
4020


In [20]:
def reduceTimesteps(xy_pair, timesteps=100):
    """
    Reduces the number of timesteps by a factor of timesteps.
    """
    return (xy_pair[0], xy_pair[1][::timesteps])

total = [reduceTimesteps(xy_pair) for xy_pair in total]

In [21]:
print(total[0][1].shape)

(4020,)


Ideally the solver should return `attraction of prey a` as `1` with some acceptable error

In [None]:
guess = np.array([2])
gnsolver = GaussNewtonSolver(fModel1, max_solver_iter=10, step = 1e-3, max_line_search_iter=5, verbose=True)
gnsolver.fit(x0, y0, -5, guess, alpha = 0.1, beta=0.8)

To take this a step further, we perform the Gauss-Newton algorithm multiple times with more data

In [None]:
from regression.regressor import *  

x0 = [pair[0] for pair in total]
y0 = [pair[1] for pair in total]

t = -5 # initial descent step size

init_guess = np.array([2]) # initial guess for parameters

solver = "GaussNewton"  

solver_params = {
    "max_solver_iter": 10,
    "step": 1e-3,
    "max_line_search_iter": 5,
    "verbose": True
}

fit_params = {
    "alpha": 0.1,
    "beta": 0.8
}

params = regressor(x0, y0, t, fModel1, init_guess, solver, solver_params, fit_params)

In [25]:
params

array([0.99999993])