In [None]:
import gstlearn as gl
import gstlearn.plot as gp
import gstlearn.document as gdoc
import numpy as np
import matplotlib.pyplot as plt

gdoc.setNoScroll()

# Constraints on drifts

<!-- SUMMARY: Estimation and Simulations performed in the framework of SPDE -->

<!-- CATEGORY: SPDE -->

This tutorial has two essential taks:

- cross-check the calculation of the Log-Likelihood (by hand or with gstlearn)
- design the definition pattern for the linkage of the drift functions in the multivariable framework

## Parameters

In [None]:
# Data
np.random.seed(123)
ndat = 100
ndim = 2

# Model
rangev = 0.2
sill = 1.
nugget = 0.1

In [None]:
# Z : vecteur des données
# Covmat : matrice de covariance
# drift : matrice de drift
# A et c permettent d'encoder les contraintes sur le vecteur des coefficients beta :
# sous la forme A * beta = c
def estimCoeff(Z,Covmat,drift,A=None,c=None):
    if A is not None and c is not None: 
        if A.shape[0]!= len(c) or  A.shape[1]!=drift.shape[1]:
            return np.nan
    
    invcovmat = np.linalg.inv(Covmat)
    invu = np.linalg.inv(drift.T@invcovmat@drift)
    estimatedCoeffs = invu@drift.T@invcovmat@Z
    
    if A is None or c is None :
        return estimatedCoeffs
    
    temp = invu@A.T@np.linalg.inv(A@invu@A.T)
    return estimatedCoeffs - temp@A@estimatedCoeffs+temp@c

def computeLogLikelihoodByHand(Z,Covmat,drift,coeffs=None,A=None,c=None):
    if coeffs is None:
        coeffs = estimCoeff(Z,Covmat,drift,A,c)
    Zc = Z - coeffs@drift.T
    cholcovmat = np.linalg.cholesky(Covmat)
    Zcstd = np.linalg.solve(cholcovmat,Zc) 
    quad = Zcstd.T@Zcstd
    logdet = 2. * np.sum(np.log(np.diag(cholcovmat)))
    return -0.5 * (quad + logdet + len(Z) * np.log(2.* np.pi))

def printCoeffs(title, coeffs, ndec=6):
    print(title + " : " + f"{str(np.round(coeffs,ndec))}")

## Monovariate case

### Model

In [None]:
model = gl.Model.createFromParam(gl.ECov.MATERN,param=1,range=rangev,sill=sill)
model.addCovFromParam(gl.ECov.NUGGET,sill=nugget)
model

### Data

In [None]:
dat = gl.Db.createFillRandom(ndat, ndim, 0)
dat["drift"] = dat["x-1"]
gl.simtub(None,dat,model)
dat

In [None]:
truecoeffs = [0.5]
dat["Simu"] = truecoeffs[0] + dat["Simu"]
#dat.setLocator("drift",gl.ELoc.F)
dat

In [None]:
model.setDriftIRF(0,0)
model

In [None]:
X = model.evalDriftMat(dat).toTL()
Covmat = model.evalCovMatSym(dat).toTL()

In [None]:
A = np.array([1]).reshape(1,1)
c = [0.3]

coeffs = estimCoeff(dat["Simu"],Covmat,X,A,c)
printCoeffs("a=0.3", coeffs)

This lack of constraint can be emulated using the LogLikelihood principle:

- calculated by hand:

In [None]:
print(f"Computed manually : " + str(np.round(computeLogLikelihoodByHand(dat["Simu"],Covmat,X),6)))

- calculated within gstlearn

In [None]:
likelihoodG = model.computeLogLikelihood(dat, True)

- using the Vecchia approximation

In [None]:
likelihoodV = gl.logLikelihoodVecchia(dat, model, 4, True)

## Bivariate

In [None]:
s1 = 0.4
s2 = 2.0
r  = 0.8
sills = np.array([[s1**2,r*s1*s2],[r*s1*s2,s2**2]])
model = gl.Model.createFromParam(gl.ECov.MATERN,param=1,range=rangev,sills=sills)

In [None]:
ndat=200
dat = gl.Db.createFillRandom(ndat, ndim, 0,2)
dat["drift"] = dat["x-1"]
gl.simtub(None,dat,model)
dat

In [None]:
ax = plt.scatter(dat["Simu.1"],dat["Simu.2"])

In [None]:
truecoeffs1 = [0.5, 3]
truecoeffs2 = [1.5,-2]
model.setDriftIRF(0,1)
dat["Simu.1"] =  truecoeffs1[0] + truecoeffs1[1] * dat["drift"] + dat["Simu.1"]
dat["Simu.2"] =  truecoeffs2[0] + truecoeffs2[1] * dat["drift"] + dat["Simu.2"]
dat.setLocator("drift",gl.ELoc.F)

### No constraint

In [None]:
Covmat = model.evalCovMatSym(dat).toTL()
X = model.evalDriftMat(dat).toTL()
Z = dat["Simu*"]
Z = Z.T.reshape(-1)

In [None]:
coeffs = estimCoeff(Z,Covmat,X)
fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

In [None]:
printCoeffs("No Constraint",coeffs)

This option can be emulated in gstlearn

In [None]:
likelihood = model.computeLogLikelihood(dat, True)

### Means of both variables are imposed

In [None]:
A = np.array([[1,0,0,0],[0,0,1,0]])
c = [0.5,1.5]
coeffs=estimCoeff(Z,Covmat,X,A,c)

In [None]:
fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

In [None]:
printCoeffs("a0=0.5 and b0=1.5", coeffs)

### Same coefficients for mean and drift coefficients

In [None]:
A = np.array([[1,0,-1,0],[0,1,0,-1]])
c = [0,0]
coeffs = estimCoeff(Z,Covmat,X,A,c)

In [None]:
fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

In [None]:
printCoeffs("a0=b0 and a1=b1", coeffs)

This can be emulated with the current *flagLinked* option

In [None]:
model.setFlagLinked(True)
likelihood = model.computeLogLikelihood(dat, True)

### Means are equal

In [None]:
A = np.array([[1,0,-1,0]])
c = [0]
coeffs = estimCoeff(Z,Covmat,X,A,c)

In [None]:
fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

In [None]:
printCoeffs("a0=b0", coeffs)

### Means are linked, coefficient of drift on the first variable is imposed

In [None]:
A = np.array([[1,0,-1,0],[0,1,0,0]])
c = [0,1]
coeffs = estimCoeff(Z,Covmat,X,A,c)

In [None]:
fig,ax = gp.init(1,2)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
gp.close()

In [None]:
printCoeffs("a0=b0 and a1=1", coeffs)

## Multivariate

We test the case of 3 variables for running a specific test.

In [None]:
sills = gl.MatrixSymmetric.createRandomDefinitePositive(3)
model = gl.Model.createFromParam(gl.ECov.MATERN,param=1,range=rangev,sills=sills)

In [None]:
model

In [None]:
ndat=200
dat = gl.Db.createFillRandom(ndat, ndim, 0,2)
dat["drift"] = dat["x-1"]
gl.simtub(None,dat,model)
dat

In [None]:
truecoeffs1 = [ 0.5, 3]
truecoeffs2 = [ 1.5,-2]
truecoeffs3 = [-0.5,-2]
model.setDriftIRF(0,1)
dat["Simu.1"] =  truecoeffs1[0] + truecoeffs1[1] * dat["drift"] + dat["Simu.1"]
dat["Simu.2"] =  truecoeffs2[0] + truecoeffs2[1] * dat["drift"] + dat["Simu.2"]
dat["Simu.3"] =  truecoeffs3[0] + truecoeffs3[1] * dat["drift"] + dat["Simu.3"]
dat.setLocator("drift",gl.ELoc.F)

In [None]:
Covmat = model.evalCovMatSym(dat).toTL()
X = model.evalDriftMat(dat).toTL()
Z = dat["Simu*"]
Z = Z.T.reshape(-1)

In [None]:
coeffs = estimCoeff(Z,Covmat,X)
fig,ax = gp.init(1,3, figsize=[15,4])
print(coeffs)
ax[0].scatter(dat["x-1"],dat["Simu.1"])
ax[0].plot([0,1],[coeffs[0],coeffs[0]+coeffs[1]])
ax[1].scatter(dat["x-1"],dat["Simu.2"])
ax[1].plot([0,1],[coeffs[2],coeffs[2]+coeffs[3]])
ax[2].scatter(dat["x-1"],dat["Simu.2"])
ax[2].plot([0,1],[coeffs[4],coeffs[4]+coeffs[5]])
gp.close()

### Means of all variables are equal

This test is meant to check the way to constrain the mean value of all (three) variables to be equal: this is done by setting constraints to the drift coefficients of the variables taken two by two. 

In [None]:
A = np.array([[1,0,-1,0,0,0],[0,0,1,0,-1,0]])
c = [0, 0]
coeffs = estimCoeff(Z,Covmat,X,A,c)

In [None]:
coeffs