In [1]:
import numpy as np
import pandas as pd
import math
from tqdm import tqdm
from time import time
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_error as MSE
from sklearn.svm import SVR
from sklearn.linear_model import Lasso, LassoLars, LassoLarsCV, Ridge, LassoCV, RidgeCV, HuberRegressor
from sklearn.linear_model import LinearRegression as lin_reg
from sklearn import preprocessing
import seaborn as sns
import scipy.interpolate as interpolate

err1 = lambda x, y: np.sqrt(MAE(x,y))
err2 = lambda x, y: np.sqrt(MSE(x,y))

Error1 = lambda x, y: err1(x,y)/err1(np.zeros(y.shape), y)
Error2 = lambda x, y: err2(x,y)/err2(np.zeros(y.shape), y)

%run SPDEs.ipynb
%run Rule.ipynb
%run Model.ipynb

In [4]:
# Class for performing Algorithm 2 

class IML():
    
    def __init__(self, Solutions, Rule, height, deg, eps = 1, step = 1, Noise = None, diff = True, train = None, test = None, trees = None, T = None, X = None):
        
        if type(Solutions) == pd.core.frame.DataFrame:
            n = Solutions.columns.levshape[0]
            self.Sol = [Solutions['S'+str(i+1)] for i in range(n)]
            self.T = np.array(self.Sol[0].index).astype(np.float64) # time grid (O_T)
            self.X = np.array(self.Sol[0].columns).astype(np.float64) # space grid (O_X)
        else:
            self.Sol = Solutions # set the solutions
            self.T = T
            self.X = X
            
        self.R = Rule 
        self.H = height
        self.deg = deg
        self.step = step # Discretization of the time step [0,\delta]. Set to be 1 by default
        self.diff = diff # True if derivatives are present in the model.
        self.train = train # Indices of the train set (U^{obs})
        self.test = test # Indices of the test set (U^{pr})
        self.trees = trees # Model's feature set
        self.Pred = None # u^{pr} 
        self.cut_off_size = 3 # If the prediction starts to blow up cut of prediction at 3*maximum(|Solutions|)
        self.max = max([np.abs(A).max().max() for A in self.Sol]) 
        self.M_train = [] # placeholder for the models for the train set. (All time points simultaneously)
        self.M_test = [] # placeholder for the models for the test set
        self.regression_fit = {x: None for x in self.X} # placeholder for linear fit at each space point
        self.Type = "Parabolic" # type od PDE. Set to Parabolic by default 
        self.BC = "P" # Boundary conditions. Set to Periodic by default
        self.eps = eps # viscosity
        self.noise = Noise
    
    def set_train_test(self, train = None, test = None, test_size = 0.3):
        
        # Create a random test/train split of Solutions if it is not given
        
        if train is not None:
            self.train = train
            if test is None:
                self.test = [a for a in np.arange(len(self.Sol)) if a not in train]
            else:
                self.test = test
        elif test is not None:
            self.test = test
            self.train = [a for a in np.arange(len(self.Sol)) if a not in test]
        else:
            self.train, self.test = train_test_split(np.arange(len(self.Sol), test_size))
    
    def set_trees(self, trees = None):
                                                     
        # If set of trees in the model is not given create a small toy model and extract trees from there
        if trees is None:
                W_toy = np.zeros((1, 2, 2))
                if self.Type in {'P', 'Parabolic'}:
                    I = SPDE(X = [0,1], T = [0,1]).Integrate_Parabolic_trees
                elif self.Type in {'W','Wave'}:
                    I = SPDE(X = [0,1], T = [0,1]).Integrate_Wave_trees
                M_toy = Model(I, self.R, self.H, self.deg, derivative = self.diff)
                # In this experiment only extra trees that are allowed are derivatives of I_c[u_0]
                # One should generalize this part in order to apply Algorithm 2 to a Wave Equation with zero forcing and "random" initial conditions
                if self.diff: 
                    M_toy.create_model_list(W_toy, dt = 1, lollipops = W_toy, diff = False, extra_planted = W_toy, extra_deg = -0.5, key = "I'[xi]")
                    self.trees = list(M_toy.models[0].keys())
                else:
                    M_toy.create_model_list(W_toy, dt = 1, lollipops = W_toy, diff = False)
                    self.trees = list(M_toy.models[0].keys())
        else:
            self.trees = trees
    
    def initialize_Predictions(self):
        self.Pred = np.zeros((len(self.T),len(self.X),len(self.test)))
        # Set initial condition of the predictions to be the known vsalues of the solutions from test set
        self.Pred[0] = self.Sol[self.test, 0, :].T
        
    def cut_off(self, vec): # Cut off fucnction that kills too big values if the algorith blows up
        Max = self.cut_off_size*self.max
        
        f = np.vectorize(lambda x: 1*(np.abs(x) < Max))
        
        return vec*f(vec)+Max*np.sign(vec)*(np.abs(vec) >= Max)
    
    def new_model(self, start, Prediction = None, T = None, X = None, test = None, train = [], time_grid_num = 10, space_interpolation = 1):
        
        # Creates models for a given set of initial conditions
        
        if Prediction is None: Prediction = self.Pred
        if T is None: T = self.T
        if X is None: X = self.X
        if test is None: test = self.test
        if train is None: train = self.train
        
        X_ = np.linspace(X[0], X[-1], space_interpolation*(len(X)-1)+1)
        l, dx, dt = len(test) + len(train), X_[1]-X_[0], T[1]-T[0]
        IC = np.zeros((l, len(X_)))
        if train == []: # By default only compute models for the initial conditons (u^pr_{t_k}) from test set
            IC = interpolate.interp1d(X, self.cut_off(Prediction[start].T))(X_)
        else: # Compute models for both train and test sets at this initial conditions
            IC[train] = interpolate.interp1d(X, self.Sol[train, start, :])(X_)
            IC[test] = interpolate.interp1d(X, self.cut_off(Prediction[start].T))(X_)
        
        time_grid = np.linspace(T[start], T[start+self.step], time_grid_num+1) 
        W_ = np.zeros((l, time_grid_num+1, len(X_))) # No noise is present. 
        
        # Compute I_c[u^pr_{t_k}]. Symbolically these will still be denoted by I[xi] in the model.
        lollipop = SPDE(Type = self.Type, BC = self.BC, eps = self.eps, IC = IC).Parabolic(W_, time_grid, X_)
        
        # Add \partial_c I_c[u^pr_{t_k}] which will be symbolically denoted by I'[xi]
        if self.diff:
            lollipop_diff = [SPDE().discrete_diff(lol, N = len(X_), flatten=False, higher = False)/dx for lol in lollipop]
        
        # Initialize integration map I
        I = SPDE(eps = self.eps, BC = self.BC, X = X_, T = time_grid).Integrate_FFT_trees
        
        # initialize model class
        M = Model(I, self.R, self.H, self.deg, derivative = self.diff)
        # create models
        if self.diff: 
            M.create_model_list(W_, dt = dt/time_grid_num, diff=False, lollipops = lollipop, extra_planted = lollipop_diff, extra_deg = -0.5, key = "I'[xi]")
        else:
            M.create_model_list(W_, dt = dt/time_grid_num, diff=False, lollipops = lollipop)
            
        
        if space_interpolation > 1:                      
            return [{tree: model[tree][:,::space_interpolation] for tree in model} for model in M.models]
        else:
            return M.models


    # Create models for training. (Step 1 of Algorith 2)
    def training_models(self, T = None, X = None, train = None, time_grid_num = 10):
        
        if T is None: T = self.T
        if X is None: X = self.X
        if train is None: train = self.train
        
        dt, dx = T[1]-T[0], X[1]-X[0]
        time_grid = np.linspace(0,dt,time_grid_num+1)
        
        # Setting initial conditions as u^1_{t_0}, u^1_{t_1}, ..., u^1_{t_{N-1}}, u^2_{t_0}, ... u^m_{t_{N-1}}
        # where m is the number of solutions in the training set, N = len(T) . Overall m*(N-1) initial conditions
        
        # Initializing IC
        IC = np.array([self.Sol[u, i, :] for u in train for i in range(len(T)-1)]).astype('float32')

        W_ = np.zeros((IC.shape[0], time_grid_num+1, len(X))).astype('float32') # No noise is present.
        
        # Compute I_c[u_{t_k}]. Symbolically these will still be denoted by I[xi] in the model.
        
        lollipop = SPDE(Type = self.Type, BC = self.BC, eps = self.eps, IC = IC).Parabolic(W_, time_grid, X)
        
        # Initialize integration map I
        I = SPDE(eps = self.eps, BC = self.BC, X = self.X, T = time_grid).Integrate_FFT_trees
        
        # Add \partial_c I_c[u^pr_{t_k}] which will be symbolically denoted by I'[xi]
        if self.diff:
            lollipop_diff = [SPDE().discrete_diff(lol, N = len(X), flatten=False, higher = False).astype('float32')/dx for lol in lollipop]

        M = Model(I, self.R, self.H, self.deg, derivative = self.diff)
        
        print("Creating Model")
        
        if self.diff: 
            M.create_model_list(W_, dt = dt/time_grid_num, diff=False, lollipops = lollipop, extra_planted = lollipop_diff, extra_deg = -0.5, key = "I'[xi]")
        else:
            M.create_model_list(W_, dt = dt/time_grid_num, diff=False, lollipops = lollipop)
            
        self.M_train = M.models
        
    # Fit linear regresion for each space point. (Step 2 of Algorithm 2)
    def fit_training_model(self, T = None, X = None, train = None, trees = None, alg = lin_reg(), prepro = False):
        if T is None: T = self.T
        if X is None: X = self.X
        if train is None: train = self.train
        if trees is None: trees = self.trees
        
        # Labels
        y = np.array([self.Sol[u, i, :] for u in train for i in range(1,len(T))])
        # Features
        x = np.array([np.array([Mu[m][-1] for m in trees]) for Mu in self.M_train])
        
        for i in tqdm(range(len(X))):
            x_ = x[:,:,i]
            y_ = y[:,i]
            if prepro: # normalise data if needed
                scaler = preprocessing.StandardScaler().fit(x_)
                x = scaler.transform(x_)
            self.regression_fit[X[i]] = alg.fit(x_, y_) # fit regression at the space point X_i
    
    # Given model for u^pr_{t_k} make a prediction of u^pr_{t_{k+1}} using linear fit created above
    # This is an iterative substep for a given k of Step 3 & 4 of Algorithm 2.
    def predict_with_fitted(self, k, M, Prediction = None, T = None, X = None, trees = None, test = None, train = None, prepro = False):          
    
        if Prediction is None: Prediction = self.Pred
        if T is None: T = self.T
        if X is None: X = self.X
        if trees is None: trees = self.trees
        if test is None: test = self.test
        if train is None: train = self.train
        
        # Given model Mu of u^pr_{t_k} for each u^pr frm test set extract all the space points
        # of the models at time \delta.
        x = np.array([np.array([Mu[m][-1] for m in trees]) for Mu in M])
        
        # Predict solution for each space point
        for i in range(len(X)):
            x_ = x[:,:,i] # model point at the time-space point (\delta, x_i)
            if prepro: # normalise data if needed
                scaler = preprocessing.StandardScaler().fit(x_)
                x = scaler.transform(x_)
            
            # prediction
            Prediction[k+1][i] = self.cut_off(self.regression_fit[X[i]].predict(x_)).flatten() 
        
        # Force the prediction to be periodic
        temp = (Prediction[k+1][0] + Prediction[k+1][len(X) - 1])/2
        Prediction[k+1][0] = temp
        Prediction[k+1][len(X) - 1] = temp
    
    # Step 3 and 4 of the Algorithm 2.
    def learn_with_fitted(self, start = None, end = None, past = True, Prediction = None, T = None, X = None, trees = None, test = None, train = None, alg = lin_reg(), prepro = False, space_interpolation = 1, save_models=False):
        
        if Prediction is None:
            if self.Pred is None:
                self.initialize_Predictions()
                Prediction = self.Pred
            else:
                Prediction = self.Pred
            
        if T is None: T = self.T
        if X is None: X = self.X
        if start is None: start = 0
        if end is None: end = len(T)-1 
        if trees is None: trees = self.trees
        if test is None: test = self.test
        if train is None: train = self.train
        
        for k in range(start, end):
            if not save_models and len(self.M_test) > 1: self.M_test.pop()
            # Create models for u^pr_{t_k}
            self.M_test.append(self.new_model(k, Prediction, T, X, test, train = [], space_interpolation = space_interpolation))
            
            # Predict u^pr_{t_{k+1}}
            self.predict_with_fitted(k, self.M_test[-1], Prediction, T, X, trees, test, train, prepro)
            
    def to_df_list(self):
        predicted = [pd.DataFrame(index = self.T, columns = self.X) for _ in range(len(self.test))]

        for i in range(len(self.T)):
            curr = self.Pred[i].T
            for j in range(len(self.test)):
                predicted[j].iloc[i] = curr[j]
        
        return predicted
    
    def to_df_df(self):
        
        predicted = self.to_df_list()
        columns = pd.MultiIndex.from_product([["S"+str(i+1) for i in self.test], self.X])       
        Predicted = pd.DataFrame(index = predicted[0].index, columns = columns)
        for i, a in enumerate(self.test):
            Predicted["S"+str(a+1)] = predicted[i]
        
        return Predicted
        