# Demonstration of gstlearn for a 2-D case study

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

## Import packages

In [None]:
import numpy as np
import pandas as pd
import sys
import os
import matplotlib.pyplot as plt
import gstlearn as gl
import gstlearn.plot as gp

Global variables

In [None]:
verbose  = True
graphics = True
gl.OptCst.define(gl.ECst.NTCOL,6)

## Reading data

The data are stored in a CSV format in the file called Pollution.dat 

In [None]:
dataDir = os.path.join(os.path.pardir,os.path.pardir,"doc","data","Pollution")
filepath = os.path.join(dataDir,"Pollution.dat")
mydb = gl.Db.createFromCSV(filepath,gl.CSVformat())
err = mydb.setLocators(["X","Y"],gl.ELoc.X)
err = mydb.setLocator("Zn",gl.ELoc.Z)
if verbose:
    dbfmt = gl.DbStringFormat()
    dbfmt.setParams(gl.FLAG_RESUME | gl.FLAG_EXTEND | gl.FLAG_VARS) 
    mydb.display(dbfmt)

Accessing to the variable names

In [None]:
print("List of all variable names =",mydb.getAllNames())

Extracting the vector containing the Zn variable in order to perform a selection

In [None]:
tabZn = mydb.getColumn('Zn')
selZn = (np.asarray(tabZn) < 20).astype(float)
mydb.addSelection(tuple(selZn),'sel')
mydb.setLocator('Pb',gl.ELoc.Z)
if verbose:
    mydb.display()

Display my Data (with samples represented by color and size)

In [None]:
if graphics:
    ax = gp.point(mydb,color_name="Pb",title="Data Set")

## Variograms

We first define the geometry of the variogram calculations

In [None]:
myVarioParamOmni = gl.VarioParam()
mydir = gl.DirParam(2,10,1.)
myVarioParamOmni.addDirs(mydir)

We use the variogram definition in order to calculate the variogram cloud.

In [None]:
dbcloud = gl.db_variogram_cloud(mydb, myVarioParamOmni)

We recall that the Variogram cloud is calculated by filling an underlying grid where each cell is painted according to the number of pairs at the given distance and given variability. Representing the variogram cloud

In [None]:
if graphics:
    gp.grid(dbcloud,"Cloud*",title="Variogram Cloud")

Calculating the experimental omni-directional variogram

In [None]:
myVarioOmni = gl.Vario(myVarioParamOmni,mydb)
err = myVarioOmni.compute(gl.ECalcVario.VARIOGRAM)
if verbose:
    myVarioOmni.display()

The variogram is represented graphically for a quick check

In [None]:
if graphics:
    axs = gp.varmod(myVarioOmni,title="Omni-directional Variogram for Pb")

Calculate a variogram in several directions

In [None]:
myvarioParam = gl.VarioParam()
mydirs = gl.generateMultipleDirs(2, 4, 10, 1.)
myvarioParam.addMultiDirs(mydirs)
myvario = gl.Vario(myvarioParam,mydb)
myvario.compute(gl.ECalcVario.VARIOGRAM)
if verbose:
    myvario.display()

In [None]:
if graphics:
    axs = gp.varmod(myvario,title="Multi-Directional Variogram of Pb")

Calculating the Variogram Map

In [None]:
myvmap = gl.db_vmap_compute(mydb,gl.ECalcVario.VARIOGRAM,20,20)
if verbose:
    myvmap.display()

In [None]:
if graphics:
    gp.grid(myvmap,"*Var",title="Variogram Map")

## Model

Fitting a Model. We call the Automatic Fitting procedure providing the list of covariance functions to be tested.

In [None]:
mymodel = gl.Model.createFromDb(mydb)
err = mymodel.fit(myvario,[gl.ECov.EXPONENTIAL,gl.ECov.SPHERICAL])

Visualizing the resulting model, overlaid on the experimental variogram

In [None]:
if graphics:
    axs = gp.varmod(myvario,mymodel,title="Model for Pb")

A IRF-0 model is created from this Covariance, adding the Universality Drift term

### Model with equality constraints 

We can impose some constraints on the parameters during the fit. For instance here, we impose an equality constraint on the range (range = 1).

In [None]:
myModelConstrained = gl.Model.createFromDb(mydb)
constr = gl.Constraints()
paramid = gl.CovParamId(0,0,gl.EConsElem.RANGE,0,0)
constr.addItem(gl.ConsItem(paramid,gl.EConsType.EQUAL,1.))
opt = gl.Option_AutoFit()
err = myModelConstrained.fit(myVarioOmni,[gl.ECov.SPHERICAL],False,opt,constr)
myModelConstrained

We can impose inequality constraints by using **EConsType.LOWER** or **EConsType.UPPER**.

## Adding a drift :

In [None]:
mymodel.addDrift(["1"])
if verbose:
    mymodel.display()

## Defining the Neighborhood

We initiate a Neigborhood (Moving with a small number of samples for Demonstration)

In [None]:
myneigh = gl.NeighMoving.create(2,False,6,10)
if verbose:
    myneigh.display()

## Checking the Moving Neighborhood

We must first create a Grid which covers the area of interest

In [None]:
mygrid = gl.DbGrid()
mygrid.resetCoveringDb(mydb,[],[0.5,0.5],[],[2,2])
if verbose:
    mygrid.display()

We can now test the neighborhood characteristics for each node of the previously defined grid.

In [None]:
err = gl.test_neigh(mydb,mygrid,mymodel,myneigh)
if verbose:
    mygrid.display()

We can visualize some of the newly created variables, such as:


- the number of points per neighborhood

In [None]:
if graphics:
    gp.grid(mygrid,"Neigh*Number",title="Number of Samples per Neighborhood")

- the one giving the maximum distance per neighborhood

In [None]:
if graphics:
    gp.grid(mygrid,"Neigh*MaxDist",title="Maximum Distance per Neighborhood")

## Cross-validation

We can now process the cross-validation step

In [None]:
err = gl.xvalid(mydb,mymodel,myneigh)
if verbose:
    mydb.display()

In [None]:
if graphics:
    ax = gp.hist(mydb,"Xvalid.Pb.stderr")

## Estimation by Kriging

We now perform the Estimation by Ordinary Kriging. The Neighborhood is changed into a Unique Neighborhood.

In [None]:
mydb.setLocator("Pb",gl.ELoc.Z)
myneigh = gl.NeighUnique.create(2)
err = gl.kriging(mydb,mygrid,mymodel,myneigh)
if verbose:
    mygrid.display()

Visualizing the results

In [None]:
if graphics:
    ax = gp.grid(mygrid,"Kriging.Pb.estim")
    ax = gp.point(mydb,"Pb",title="Estimate of Pb",ax=ax)

In [None]:
if graphics:
    ax = gp.grid(mygrid,"Kriging.Pb.stdev")
    ax = gp.point(mydb,"Pb",title="St. Deviation of Pb",ax=ax)

## Simulations

We must first transform the Data into Gaussian

In [None]:
myanamPb = gl.AnamHermite(30)
myanamPb.fit(mydb)
if verbose:
    myanamPb.display()

We can produce the Gaussian Anamorphosis graphically within its definition domain.

In [None]:
if graphics:
    res = myanamPb.sample()
    ax = gp.XY(res.y,res.z,xlim=res.aylim,ylim=res.azlim,title="Gaussian Anamorphosis for Pb")

The next step consists in translating the target variable ('Pb') into its Gaussian transform

In [None]:
mydb.setLocator("Pb",gl.ELoc.Z)
err = myanamPb.RawToGaussian(mydb)
if verbose:
    mydb.display()

We quickly calculate experimental (omni-directional) variograms using the already defined directions

In [None]:
myvarioParam = gl.VarioParam()
mydir = gl.DirParam(2,10,1.)
myvarioParam.addDirs(mydir)
myVario = gl.Vario(myvarioParam,mydb)
err = myvario.compute(gl.ECalcVario.VARIOGRAM)

We fit the model by automatic fit (with the constraints that the total sill be equal to 1).

In [None]:
mymodelG = gl.Model.createFromDb(mydb)
err = mymodelG.fit(myvario,[gl.ECov.EXPONENTIAL])
if graphics:
    ax = gp.varmod(myvario,mymodelG,title="Model for Gaussian Pb")

We perform a set of 10 conditional simulations using the Turning Bands Method.

In [None]:
err = gl.simtub(mydb,mygrid,mymodel,myneigh,10)
if verbose:
    mygrid.display()

Some statistics on the Conditional simulations in Gaussian scale

In [None]:
if verbose:
    err = mygrid.statistics(["Simu.Y.*"],["mini","maxi","mean","stdv"],True,True,True)

We visualize a conditional simulation in Gaussian scale

In [None]:
if graphics:
    ax = gp.grid(mygrid,"Simu.Y.Pb.1")
    ax = gp.point(mydb,"Pb",title="One Simulation of Pb in Gaussian Scale",ax=ax)

We turn the Gaussian conditional simulations into Raw scale (using the Anamorphosis back transform) and get rid of the Gaussian conditional simulations.

In [None]:
myanamPb.GaussianToRaw(mygrid,"Simu.Y.*")
mygrid.deleteColumn("Simu.Y.*")
if verbose:
    mygrid.display()

We calculate some statistics on the Conditional Simulations in Raw scale.

In [None]:
if verbose:
    err = mygrid.statistics(["Z.Simu.*"],["mini","maxi","mean","stdv"],True,True,True)

We visualize a Conditional Simulation in Raw Scale

In [None]:
if graphics:
    ax = gp.grid(mygrid,"Z.Simu.Y.Pb.1")
    ax = gp.point(mydb,"Pb",title="One simulation of Pb in Raw Scale", ax=ax)

Let us now average the conditional simulations in order to have a comparison with the estimation by kriging.

In [None]:
err = mygrid.statistics(["Z.Simu.*"],["Mean"],True,False,False)
if verbose:
    mygrid.display()

Displaying the average of the Conditional Simulations

In [None]:
if graphics:
    ax = gp.grid(mygrid,"Stats*Mean")
    ax = gp.point(mydb,"Pb",title="Mean of Pb simulations",ax=ax)

## Multivariate case

The Gaussian transform of the Pb variable has already been calculated. It suffices to perform the Gaussian transform of the Zn variable

In [None]:
mydb.setLocator("Zn",gl.ELoc.Z)
myanamZn = gl.AnamHermite(30)
myanamZn.fit(mydb)
if verbose:
    myanamZn.display()

In [None]:
if graphics:
    res = myanamZn.sample()
    ax = gp.XY(res.y,res.z,xlim=res.aylim,ylim=res.azlim,title="Gaussian Anamorphosis for Zn")

We convert the raw data into its Gaussian equivalent

In [None]:
mydb.setLocator("Zn",gl.ELoc.Z)
err = myanamZn.RawToGaussian(mydb)
if verbose:
    mydb.display()

We now perform the multivariate variogram caculation

In [None]:
mydb.setLocators(["Y.Pb","Y.Zn"],gl.ELoc.Z)
myvario = gl.Vario(myvarioParam,mydb)
err = myvario.compute(gl.ECalcVario.VARIOGRAM)
mymodelM = gl.Model.createFromDb(mydb)
err = mymodelM.fit(myvario,[gl.ECov.EXPONENTIAL])
if graphics:
    ax = gp.varmod(myvario,mymodelM,title="Multivariate Model",figsize=[5,5])

We perform 10 bivariate conditional simulations (deleting the previous monovariate simulation outcomes first for better legibility)

In [None]:
mygrid.deleteColumn("Z.Simu*")
err = gl.simtub(mydb,mygrid,mymodelM,myneigh,10)
if verbose:
    mygrid.display()

We back-transform each set of simulation outcomes using its own Gaussian Anamorphosis function. Finally we delete the Gaussian variables and ask for the statistics on the simulated variables in the Raw Scale.

In [None]:
err = myanamZn.GaussianToRaw(mygrid,"Simu.Y.Zn*")
err = myanamPb.GaussianToRaw(mygrid,"Simu.Y.Pb*")
mygrid.deleteColumn("Simu.Y*")
if verbose:
    err = mygrid.statistics(["Z.Simu.*"],["mini","maxi","mean","stdv"],True,True,True)

## Categorical Variable

We compare the initial variable 'Pb' with a set of disjoint intervals. The 'Pb' values varying from 3 to 12.7, we consider three classes:
- values below 4
- values between 4 and 6
- values above 6


We first build the indicators for each class

In [None]:
limits = gl.Limits([gl.TEST, 4., 6., gl.TEST])
if verbose:
    limits.display()

We apply the set of limits previously defined in order to transform the input variable into Indicators of the different classes.

In [None]:
err = limits.toIndicator(mydb,"Pb")
if verbose:
    mydb.display()

We calculate the variogram of the Indicators for future use

In [None]:
myvarioindParam = gl.VarioParam()
myvarioindParam.addDirs(mydir)
myvarioInd = gl.Vario(myvarioindParam,mydb)
err = myvarioInd.compute(gl.ECalcVario.VARIOGRAM)
if verbose:
    myvarioInd.display()

In [None]:
ax = gp.varmod(myvarioInd)

Then we build a categorical variable which gives the index of the class to which each sample belongs

In [None]:
err = limits.toCategory(mydb,"Pb")
if verbose:
    dbfmt = gl.DbStringFormat()
    dbfmt.setParams(gl.FLAG_STATS)
    dbfmt.setNames(["Category*"])
    dbfmt.setMode(2)
    mydb.display(dbfmt)