# Time series analysis in Python 2

Multi-scale sample entropy analysis

In [None]:
%matplotlib inline 
# set plotting option for notebook

Note that before the next step we have to install the package 'netCDF4' by opening the Anaconda terminal with administrator rights and typing in:

    conda install netCDF4
    
We also have to install the gdal (geospatial data language), geopandas (for geographic PANDAS data objects), geoplot (for map making) & proj4 (geographic projection database) packages for Python.
    
    conda install geopandas

    conda install geoplot -c conda-forge
    
    conda install proj4
    
Geopandas also contains the GDAL package.
Now we can import the netCDF file with the data and plot geographic maps.

In [None]:
# import libraries
from copy import copy, deepcopy
import geopandas
import geoplot as gplt
import imageio #to make a movie loop
import netCDF4 # import the libary to read netCDF format files
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
import matplotlib
import os
from os import listdir
from os.path import isfile, join
import pandas as pd
from pymse_with_ci95 import PyMSE # the file pymse_with_ci95.py needs to be available locally
from scipy import stats # import the linear regression model and plot functions
from scipy import misc # import image visualisation
from scipy.io import netcdf
from scipy import optimize
from scipy.optimize import curve_fit # import the nonlinear regression model and plot functions
from skimage import data, io, filters
import sys
io.use_plugin('matplotlib')

# Space-time data analysis

Now we will learn to run the multi-scale sample entropy on a stack of multitemporal spatial data. We have downloaded the Climate Research Unit's data on variance-adjusted global gridded temperature anomalies from the University of East Anglia.

Let's set some options first.

In [None]:
# set options for plots to be produced
monthlymaps = False # create global maps for each month?
yearlymaps = True  # create global maps for each year?

In [None]:
# Import CRUTEM4 temperature data from the web site

# set working direcory
wd = '/home/heiko/sf_GY7709_Satellite_Data_Analysis_in_Python/practicals/'
# The lat/lon coordinate data and time stamp file is here:
datafile = 'CRUTEM.4.6.0.0.variance_adjusted.nc'
# The data file was downloaded from https://crudata.uea.ac.uk/cru/data/temperature/CRUTEM.4.6.0.0.variance_adjusted.nc

# create a dataset object
foo = netCDF4.Dataset(wd + datafile)

print('What file format is the file in?')
print(foo.file_format)

print('Which dimensions does the netCDF file have?')
print(foo.dimensions.keys())

print('Which variables does the netCDF file have?')
print(foo.variables.keys())

print('How can we get the time into a new variable?')
t = foo.variables['time']
print(t)
print("Number of time steps:")
nt = len(t)
print(nt)
print('First 10 values:')
print(t[0:10,])

# extract netcdf variables
lat = foo.variables['latitude']
lon = foo.variables['longitude']
crutem4 = foo.variables['temperature_anomaly']
print("Type of CRUTEM4 object: " + str(type(crutem4)))

print("Latitudes:")
print(lat.units)
print(lat.shape)
print("Longitudes:")
print(lon.units)
print(lon.shape)
print("Temperature anomaly:")
print(crutem4.units)
print(crutem4.shape)

# convert netcdf variables into geopandas object
lat = foo.variables['latitude'][:]
lon = foo.variables['longitude'][:]
crutem4 = foo.variables['temperature_anomaly'][:]
print("Type of CRUTEM4 object: " + str(type(crutem4)))


Now we have imported a whole lot of good libraries, we are much better prepared to do some serious plotting.
First, we want to plot a histogram of our temperature data.
We need to first of all remove the missing values from the data, which are coded as -1e+30 in the CRUTEM4v dataset.
We also want to remove all zero values, which indicate water areas.


In [None]:
# mask out zeros and missing values in CRUTEM4v data
# missing values in the temperature anomalies are coded as -1e+30
crutem4.data[crutem4.data < -999.] = np.nan
crutem4.data[crutem4.data == 0.] = np.nan
tmin = crutem4[~np.isnan(crutem4)].min()
tmax = crutem4[~np.isnan(crutem4)].max()
print("tmin="+str(tmin)+"  tmax="+str(tmax))

Let's define a generic plotting function that makes a global map of the temperature anomaly data and plots the coastlines of continents over it. We will use geopandas to make it simple.

In [None]:
# define world map
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world = world[(world.pop_est>0) & (world.name!="Antarctica")]

# define a plotting function to make a global map
def do_worldmap(image, lat, lon, ymin, ymax, file, title, label):
    # image = 2D array
    # lat, lon = arrays of latitude / longitude coordinates (same dimensions as image)
    # ymin, ymax = values for boundaries of colour legend
    # file = filename including path
    # title = title string for map
    # label = text for legend label
    
    fig = plt.figure(figsize=(18, 8)) # create the empty figure (size in inches)
    ax1 = plt.subplot() # create an axis object for one plot
    gplt.polyplot(world, ax=ax1, edgecolor='black') # add worldmap and temperature anomalies
    plt.suptitle(title, fontsize=16) # add main plot title
    im = ax1.imshow(image, vmax=ymax, vmin=ymin, cmap='jet', origin='lower', 
                    extent=(min(lon),max(lon),min(lat),max(lat))) # show image in subplot 1
    # Note that with 'imshow', the direction of the vertical axis and thus the default extent
    #    values (left, right, bottom, top) for top and bottom depend on the origin:
    #    For origin == 'upper' the default is (-0.5, numcols-0.5, numrows-0.5, -0.5).
    #    For origin == 'lower' the default is (-0.5, numcols-0.5, -0.5, numrows-0.5).
    fig.colorbar(im, ax=ax1, label=label) # add colour bar legend
    ax1.set_aspect('auto')
    plt.savefig(file, bbox_inches='tight', pad_inches=0.1, figsize=(8.0, 4.0), dpi=150)
    plt.close(fig)
    

Now we have a function called do_worldmap to standardise our map making, we can process all the months or years and make one map per time period automatically. We save all the files as tif files at the end of the function, so we can see the results in our file explorer.
Once we have made monthly or annual images, how about we make a movie loop over all images?

In [None]:
# set limits for the colorbar display range
tmin = -3
tmax = 3
# set label for legend of worldmaps
label='temperature anomaly (oC)'

nadisp = 0 # display value for NaN

#now make maps for all months (optional)
if monthlymaps:
    mapdir = wd+'crutem4_maps_monthly'
    if not os.path.isdir(mapdir):
        os.mkdir(mapdir)
        print('New directory has been created: ' + mapdir)
    for year in range(1850, int(1850 + nt/12)):
        for month in range(1, 13):
            file = mapdir+'crutem4_map_'+str(year)+'_'+str(month)+'.tif'
            if os.path.isfile(file): # check if the tif file already exists, if so then skip it
                print(file+' already exists. Skipping to the next.')
            else:
                print(year, month)
                title = 'CRUTEM4 '+str(month)+' '+str(year)
                ti = (year-1850)*12 + month-1 #time index for array
                image = crutem4[ti,:,:]
                mask = image == np.nan
                image[mask] = nadisp
                do_worldmap(image, lat, lon, tmin, tmax, file, title, label)
                print('Created file: '+file)
    # make a movie of all monthly maps
    imagestack = []
    filenames = [f for f in listdir(mapdir) if isfile(join(mapdir, f))]
    kargs = { 'duration': 0.5 }
    for filename in filenames:
        imagestack.append(imageio.imread(mapdir+filename))
    imageio.mimsave(mapdir + 'crutem4movie_monthly.gif', imagestack, loop=0, **kargs)
    print('Created file: '+mapdir + 'crutem4movie_monthly.gif')

# now make maps for all years (optional)
if yearlymaps:
    mapdir = wd+'crutem4_maps_annual/'
    if not os.path.isdir(mapdir):
        os.mkdir(mapdir)
        print('New directory has been created: ' + mapdir)
    for year in range(1850, int(1850 + nt/12)):
        file = mapdir+'crutem4_map_'+str(year)+'.tif'
        if os.path.isfile(file): # check if the tif file already exists, if so then skip it
            print(file+' already exists. Skipping to the next.')
        else:
            print(year)
            image = []
            n = [] # map of the number of zeros per grid cell over one year
            for month in range(1,13):
                ti = (year-1850)*12 + month - 1 # time index for array
                imagem = crutem4[ti,:,:]
                if month == 1: 
                    image = deepcopy(imagem)
                    n = 1*(image == 0.) # returns 1 for cells with missing values and 0 otherwise
                else:
                    image = image+imagem
                    n = n + 1*(image == 0.) 
                mask = imagem == np.nan
                image[mask] = nadisp
            image=image/(12-n)
            title = 'CRUTEM4 '+str(year)
            do_worldmap(image, lat, lon, tmin, tmax, file, title, label)
            print('Created file: '+file)

    # make a movie of all annual maps
    imagestack = []
    filenames = [f for f in listdir(mapdir) if isfile(join(mapdir, f))]
    for filename in filenames:
        imagestack.append(imageio.imread(mapdir+filename))
    kargs = { 'duration': 0.5 }
    imageio.mimsave(mapdir + '_crutem4movie.gif', imagestack, loop=0, **kargs)
    print('Created file: '+mapdir + '_crutem4movie.gif')
        

Before moving on, open powerpoint and insert the gif file with the movie into a slide. Now play the slide show. Voila!


# Your portfolio task

Inspect the output tiff files and write a one-page summary of the data for your portfolio, including 2-3 small figures.

# Multi-scale entropy analysis

We now treat the grid boxes of the temperature anomaly data as separate time series. For each time series, we want to calculate the multi-scale sample entropy (MSE) in that grid box for two separate chunks of the data.

In [None]:
# OPTIONS

#Folder name for plots; used in build_dir_structure
outfolder = 'crutem4_mse'

##############################################################################
# Set the parameters for MSE calculation
##############################################################################

maxscale = 48  # maximum scale factor
r = 0.5        # tolerance r
m = 3          # pattern length m
q = 0.95       # probability for the confidence interval
bp = 111*12    # define a breakpoint in the timeseries to analyse it in two chunks (here, after 111 years)
minlen = 500   # minimum length of the time series chunks to be considered for analysis
plottrends = True # create graphics files showing whether trends are present in the chunks of data
cols = list(['black','red','blue','green', 'magenta','purple','brown']) # define color palette for maps

In [None]:
# HELPER FUNCTIONS
# Sets the directory tree up for the rest of the processing
def build_dir_structure():
    dirs = ('crutem4_maps_monthly', 'crutem4_maps_annual', outfolder, outfolder + '/data', outfolder + '/mse', outfolder + '/plots')
    for dirName in dirs:
        if not os.path.exists(wd + dirName):
            os.mkdir(wd + dirName)
            print("Directory " , wd + dirName ,  " created")
        else:
            print("Directory " , wd + dirName ,  " already exists")


# define the nonlinear model
def func(t, A, K, C):
    return A * np.exp(-K*t) + C


def fit_exp_nonlinear(t, y):
    opt_parms, parm_cov = optimize.curve_fit(func, t, y, maxfev=15000)
    A, K, C = opt_parms
    return A, K, C


def conf_int(sd, n, confidence=0.95):
    # conf_int = standard error*t_coef
    #
    tcoef = stats.t.ppf(confidence, n)
    se = sd/(np.sqrt(n))
    ci = se*tcoef
    return ci


# define functions to read/write floating point numbers from/to a text file
def read_floats(filename):
    with open(filename) as f:
        return [float(x) for x in f]
    f.close()


def write_floats(data, filename):
    file = open(filename, 'w')
    for item in data:
        file.write("%f\n" % item)
    file.close()


# define a plotting function for the maps
def do_plot(image, file, title, tmin, tmax):
    """"""
    plt.figure(figsize=(12.0, 9.0),dpi=150)
    plt.title(title)
    plt.subplot(1, 1, 1)
    io.imshow(image, cmap=plt.cm.jet, vmin=tmin, vmax=tmax), plt.savefig(file, figsize=(12.0, 9.0),dpi=150)
    plt.close()

In [None]:
##############################################################################
# make the directory structure if it does not exist
##############################################################################

build_dir_structure()

########################################################################
from scipy.stats import t
import collections
########################################################################

#----------------------------------------------------------------------
def read_data(dataset):
    """"""
    if isinstance(dataset, (str, bytes)):
        assert os.path.isfile(dataset), "Missing \"{}\" file.".format(dataset)

        with open(dataset, "r") as file:
            dataset = np.array(list(map(float, file.readlines())))
            file.close()

    return dataset

#----------------------------------------------------------------------
class PyMSE:
    """"""

    #----------------------------------------------------------------------
    def __init__(self, dataset, scale=20, m=2, r=0.15, q=0.975):
        """Constructor"""
        if isinstance(dataset, collections.Iterable):
            self.data = np.array(dataset)
        elif os.path.exists(dataset):
            self.data = read_data(dataset)
        self.data_cg = self.data.copy()
        self.get(scale, m, r, q)  # Everything in get() should be in init()


    #----------------------------------------------------------------------
    def get(self, scale=20, m=2, r=0.15, q=0.975):

        assert isinstance(scale, (int, list, tuple, map, range, np.ndarray)), "scale must be int, list or tuple."
        assert isinstance(m, (int, list, tuple, map, range, np.ndarray)), "m must be int, list or tuple."
        assert isinstance(r, (int, float, list, tuple, map, range, np.ndarray)), "r must be int, float, list or tuple."
        assert isinstance(q, (float)), "q must be float."

        #Scales
        if isinstance(scale, int):
            self.SCALE = [scale] # convert integer to a list of length 1
        elif isinstance(scale, (list, tuple, range, np.ndarray)):
            self.SCALE = scale # leave it unchanged
        else:
            self.SCALE = range(1, 21, 1) # set a default
        self.scale_max = self.SCALE[-1]
        if len(self.SCALE) > 1:
            self.scale_step = self.SCALE[1] - self.SCALE[0]
        else:
            self.scale_step = 1

        # q
        self.q = [q]

        # So, I _think_ that M is the range of resolutions for MSE calcs.
        if isinstance(m, int):
            self.M = [m]
        elif isinstance(m, (list, tuple, range, np.ndarray)):
            self.M = m
        else:
            self.M = range(2, 3, 1)  # This is a very roundabout way of saying '2'.
        self.m_min = self.M[0]
        self.m_max = self.M[-1]
        if len(self.M) > 1:
            self.m_step = self.M[1] - self.M[0]
        else:
            self.m_step = 1

        if isinstance(r, (int, float)):
            self.R = np.arange(r, r*1.0000000001, 0.05)
        elif isinstance(r, list, tuple, range, np.ndarray):
            self.R = r
        else:
            self.R = np.arange(r, r*1.0000000001, 0.05)
        self.r_min = self.R[0]
        self.r_max = self.R[-1]
        if len(self.R) > 1:
            self.r_step = self.R[1] - self.R[0]
        else:
            self.r_step = 0.05
        
        standard_deviation = self.data.std()
        
        SE = {}
        for sc in self.SCALE:
            self.__coarse_graining__(sc)
            for r in self.R:
                se = self.sample_entropy(r, standard_deviation, sc);
                if not r in SE:
                    SE[r] = []
                SE[r].append(se)

        self.DATA = []

        for se_r in SE.keys():
            ent = list(map(lambda *arg:arg, *SE[se_r]))
            for m_ in range(len(self.M)):
                entr = dict(map(lambda *arg:arg, self.SCALE, ent[m_]))
                self.DATA.append({"m": self.M[m_], "mse": entr, "r": se_r, "q": q})

        if len(self.DATA) == 1:
            return self.DATA[0]
        else:
            return self.DATA

    #----------------------------------------------------------------------
    def __coarse_graining__(self, resolution):
        out_len = int(len(self.data) / resolution)
        out = np.empty([out_len, 1])
        for i, subarray in enumerate(np.array_split(self.data, out_len)):
            # numpy.array_split is identical to numpy.split, but won't raise an exception if the groups aren't equal length.
            # If number of chunks > len(subarray) you get blank arrays nested inside.
            # To address that you can remove empty arrays by:
            subarray = [x for x in subarray if x.size > 0]
            out[i] = np.mean(subarray)
        self.data_cg = out

    #----------------------------------------------------------------------
    def sample_entropy(self, r, standard_deviation, scale=1, q=0.975):
        """"""
        se = []
        ci = []

        nlin = float(len(self.data))
        nlin_j = int((nlin/scale) - self.m_max)
        r_new = r*standard_deviation

        cont = [0] * (self.m_max+2)

        for i in range(0, nlin_j):
            for l in range(i+1, nlin_j):
                k = 0
                while k < self.m_max and (np.abs(self.data_cg[i+k] - self.data_cg[l+k]) <= r_new):
                    k += 1
                    cont[k] += 1
                if k == self.m_max and (np.abs(self.data_cg[i+self.m_max] - self.data_cg[l+self.m_max]) <= r_new):
                    cont[self.m_max+1] += 1

        for i in self.M:
            if cont[i+1] == 0 or cont[i] == 0:
                if (nlin_j > 0) and ((nlin_j-1) > 0):
                    se.append(-1 * np.log(1.0/(nlin_j*(nlin_j-1))))
                    ci.append(standard_deviation * t.ppf(q, cont[i] - 1) / np.sqrt(cont[i]))
                else: 
                    se.append(0.0)
                    ci.append(0.0)
            else:
                se.append(-1 * np.log(float(cont[i+1])/cont[i]))
                ci.append(standard_deviation * t.ppf(q, cont[i] - 1) / np.sqrt(cont[i]))
        return se, ci


    def conf_int(self, r, standard_deviation, scale=1):
        """

        Parameters
        ----------
        r
        standard_deviation
        scale

        Returns
        -------

        """
        ci95 = []
        
        nlin = float(len(self.data))
        nlin_j = int((nlin/scale) - self.m_max)
        r_new = r * standard_deviation

        cont = [0] * (self.m_max+2)

        for i in range(0, nlin_j):
            for l in range(i+1, nlin_j):
                k = 0
                while k < self.m_max and (np.abs(self.data_cg[i+k] - self.data_cg[l+k]) <= r_new):
                    k += 1
                    cont[k] += 1
                if k == self.m_max and (np.abs(self.data_cg[i+self.m_max] - self.data_cg[l+self.m_max]) <= r_new):
                    cont[self.m_max+1] += 1

        for i in self.M:
            if cont[i+1] == 0 or cont[i] == 0:
                if (nlin_j > 0) and ((nlin_j-1) > 0):
                    # calculate the confidence interval of the sample entropy at scale j after Richman and Moorman (2000)
                    # We have B template matches of which A actually occur
                    # Assign 1 to the A forward matches and 0 to the B-A potential forward matches that do not occur
                    # The 95% confidence interval is then: SD * t(B-1, 0.975) / sqrt(B)
                    # where SD is the standard deviation of the time-series
                    # Here, A = cont[i]+1 and B = cont[i]
                    ci95.append(np.std(self[1:nlin_j]) * t.ppf(0.975, cont[i]-1) / np.sqrt(cont[i]))
                else: 
                    ci95.append(0.0)
            else:
                ci95.append(0.0)
                
        return ci95


    #----------------------------------------------------------------------
    def __standard_deviation__(self):
        """"""
        nlin = float(len(self.data))
        sum_ = sum(self.data)
        sum2_ = sum(self.data*self.data)

        return np.sqrt((sum2_ - sum_*(sum_/nlin))/(nlin - 1))

In [None]:
##############################################################################
# Multi-scale entropy analysis of temperature data for each grid box
##############################################################################

# create arrays for the results
msestack = np.zeros([36,72,2,maxscale]) # store the results of MSE for each grid box and chunk
predict_msestack = np.zeros([36,72,2,maxscale]) # store the model predictions
cistack = np.zeros([36,72,2,maxscale]) # store the confidence intervals

# loop over all grid boxes
for row in range(0,36):
    for col in range(0,72):
        # Pull out the time series data for this grid box
        ts = np.array(crutem4[:,row,col]) # temperature time series data for that grid box

        # Missing value removal is needed by PyMSE:
        # The assumption is that we want to find a time-series starting at time 0+ti0
        # and ending at time nt-ti1, where the missing values at the beginning and the end of the series
        # are dropped. In case any missing values occur in the middle of the time series,
        # these are interpolated with linear interpolation.

        # find the first time index from which on no more missing values (NaN) are found for chunk 0
        ti0 = -999 # remember the time index position
        for ti in range(bp,0,-1):
            subset = ts[ti:bp]
            if np.all(~np.isnan(subset)): # (sum(1*(subset < -999)) == 0):
                ti0 = ti # remember the time index position
                ts0 = ts[ti0:bp]

        # find the last time index before which no missing values are found for chunk 1
        ti1 = -999 # remember the time index position
        for ti in range(bp,nt,1):
            subset = ts[bp:ti]
            if np.all(~np.isnan(subset)): # (sum(1*(subset < -999)) == 0):
                ti1 = ti # remember the time index position
                ts1 = ts[bp:ti1]

        # count the number of subsequent values that are not NaN in chunks 1 and 2
        n1 = len(ts0)
        n2 = len(ts1)
        
        # only proceed if both chunks of the time series are long enough
        if (n1 >= minlen) & (n2 >= minlen):
            print('Row '+str(row+1)+' Col '+str(col+1)+' suitable for analysis.  n='+str(nt)+' n1='+str(n1)+' n2='+str(n2))
            
        # plot the original time series data and the trend line if significant
        # note that we will continue to use the original data in the MSE analysis
        # and not the detrended data, because we do not want to change the variances
            if plottrends:
                file = wd + 'crutem4_mse/plots/crutem4_trends_R'+str(row+1)+'_C'+str(col+1)
                title = 'CRUTEM4, R='+str(row+1)+' C='+str(col+1)
                plt.figure(figsize=(12.0, 9.0),dpi=150)
                f, axarr = plt.subplots(2, 2)
                plt.title(title)
                # trend analysis
                x0 = range(ti0, bp)
                x1 = range(bp, ti1)
                slope0, intercept0, r_value0, p_value0, std_err0 = stats.linregress(x0,ts0)
                slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(x1,ts1)
                axarr[0, 0].set_title('Time series TS0')
                axarr[0, 0].plot(x0, ts0, '.k', ms=2)
                if p_value0 < 0.05:
                    pred0 = intercept0 + slope0 * x0
                    axarr[0, 0].plot(x0, pred0, '-r')
                    dts0 = ts0 - pred0
                else:
                    dts0 = ts0
                axarr[1, 0].set_title('Detrended TS0')
                axarr[1, 0].plot(x0, dts0, '.k', ms=2)
                axarr[0, 1].set_title('Time series TS1')
                axarr[0, 1].plot(x1, ts1, '.k', ms=2)
                if p_value1 < 0.05:
                    pred1 = intercept1 + slope1 * x1
                    axarr[0, 1].plot(x1, pred1, '-r')
                    dts1 = ts1 - pred1
                else:
                    dts1 = ts1
                axarr[1, 1].set_title('Detrended TS1')
                axarr[1, 1].plot(x1, dts1, '.k', ms=2)
                # Fine-tune figure; hide x ticks for top plots and y ticks for right plots
                plt.setp([a.get_xticklabels() for a in axarr[0, :]], visible=False)
                plt.setp([a.get_yticklabels() for a in axarr[:, 1]], visible=False), plt.savefig(file, figsize=(12.0, 9.0),dpi=150)
                plt.close()

            # write the chunks to a separate text file for further analysis
            write_floats(ts0, outfolder+'/data/crutem4_ts_'+str(row+1)+'_'+str(col+1)+'_1.txt')
            print('created text file: '+outfolder+'/data/crutem4_ts_'+str(row+1)+'_'+str(col+1)+'_1.txt')
            write_floats(ts1, outfolder+'/data/crutem4_ts_'+str(row+1)+'_'+str(col+1)+'_2.txt')
            print('created text file: '+outfolder+'/data/crutem4_ts_'+str(row+1)+'_'+str(col+1)+'_2.txt')

            # convert time series into class 'PyMSE'
            # this can be done by giving a filename or the name of an array or similar data object
            ts0 = PyMSE(ts0)
            ts1 = PyMSE(ts1)
            '''
            # Or:
            ts0 = PyMSE(outfolder+'/data/crutem4_ts_'+str(row+1)+'_'+str(col+1)+'_1.txt')
            ts1 = PyMSE(outfolder+'/data/crutem4_ts_'+str(row+1)+'_'+str(col+1)+'_2.txt')
            '''
            
            # coarsegraining
            ts0.data_cg
            ts1.data_cg

            # calculate the sample entropy, for r,std,scale_factor
            mse0 = np.zeros([maxscale,])
            ci0 = np.zeros([maxscale,])
            cin0 = np.zeros([maxscale,])
            mse1 = np.zeros([maxscale,])
            ci1 = np.zeros([maxscale,])
            cin1 = np.zeros([maxscale,])

            # chunk 0
            for i in range(0, maxscale):
                sd = np.std(ts0.data)
                ts0.get(scale=i+1,m=m,r=r,q=q) # get the sample entropy parameters into the object
                out = ts0.sample_entropy(r, sd, scale=i+1, q=q) # calculate MSE for a given r, standard deviation, scale factor
                mse0[i] = out[0][:][0]
                ci0[i] = out[1][:][0]
                
            # chunk 1
            for i in range(0, maxscale):
                sd = np.std(ts1.data)
                ts1.get(scale=i+1,m=m,r=r,q=q)
                out = ts1.sample_entropy(r, sd, scale=i+1, q=q)
                mse1[i] = out[0][:][0]
                ci1[i] = out[1][:][0]
                  
            # Fit the exponential model, masking out SE >= 2
            # these are values that could not be estimated from the data and have been assigned a theoretical upper bound 
            x = np.arange(1,maxscale+1,dtype=float)
            A0, K0, C0 = fit_exp_nonlinear(x[mse0<2],mse0[mse0<2])
            A1, K1, C1 = fit_exp_nonlinear(x[mse1<2],mse1[mse1<2])
            predict_mse0 = func(x, A0, K0, C0)
            predict_mse1 = func(x, A1, K1, C1)
            
            #Calculate prediction error
            residuals0 = mse0 - predict_mse0
            residuals1 = mse1 - predict_mse1
            fres0 = sum(residuals0**2)
            fres1 = sum(residuals1**2)
    
            #save results to text file
            np.savetxt(outfolder+'/data/crutem4_mse_'+str(row+1)+'_'+str(col+1)+'_1.txt', 
                       np.column_stack((x, mse0, predict_mse0, ci0, residuals0)), delimiter='\t')
            print('Created text file: '+outfolder+'/data/crutem4_mse_'+str(row+1)+'_'+str(col+1)+'_1.txt')
            np.savetxt(outfolder+'/data/crutem4_mse_'+str(row+1)+'_'+str(col+1)+'_2.txt', 
                       np.column_stack((x, mse1, predict_mse1, ci1, residuals1)), delimiter='\t')
            print('Created text file: '+outfolder+'/data/crutem4_mse_'+str(row+1)+'_'+str(col+1)+'_2.txt')
    
            # save MSE plot to graphics file, leaving out SE>=2
            # these are values that could not be estimated from the data and have been assigned a theoretical upper bound 
            file = wd+'crutem4_mse/mse/crutem4_mse_'+str(row+1)+'_'+str(col+1)+'_1.tif'
            fig1 = plt.figure(figsize=(12.0, 9.0),dpi=150) # The size of the figure is specified as (width, height) in inches
            fig1 = plt.plot(x[mse0<2], mse0[mse0<2], 'ok')
            fig1 = plt.plot(x[mse0<2], mse0[mse0<2], '--g')
            fig1 = plt.plot(x[mse0<2], mse0[mse0<2] + ci0[mse0<2], ':g')
            fig1 = plt.plot(x[mse0<2], mse0[mse0<2] - ci0[mse0<2], ':g')
            #plt.ylim(0,2)
            plt.xlabel('scale factor')
            plt.ylabel('SE')
            plt.title(('CRUTEM4, R%d C%d, part=%d, r=%4.2f, m=%d, sd=%4.2f') % (row+1,col+1,1,r,m,sd))
            fig1 = plt.plot(x, predict_mse0, 'k-')
            plt.savefig(file,figsize=(12.0, 9.0),dpi=150)
            print('Created file: '+file)

            # the same for chunk 2
            file = wd+'crutem4_mse/mse/crutem4_mse_'+str(row+1)+'_'+str(col+1)+'_2.tif'
            fig1 = plt.figure(figsize=(12.0, 9.0),dpi=150) # The size of the figure is specified as (width, height) in inches
            fig1 = plt.plot(x[mse1<2], mse1[mse1<2], 'ok')
            fig1 = plt.plot(x[mse1<2], mse1[mse1<2], '--g')
            fig1 = plt.plot(x[mse1<2], mse1[mse1<2] + ci1[mse1<2], ':g')
            fig1 = plt.plot(x[mse1<2], mse1[mse1<2] - ci1[mse1<2], ':g')
            #plt.ylim(0,2)
            plt.xlabel('scale factor')
            plt.ylabel('SE')
            plt.title(('CRUTEM4, R%d C%d, part=%d, r=%4.2f, m=%d, sd=%4.2f') % (row+1,col+1,2,r,m,sd))
            fig1 = plt.plot(x, predict_mse1, 'k-'), 
            plt.savefig(file,figsize=(12.0, 9.0),dpi=150)
            print('Created file: '+file)

            #keep MSE estimates for later
            msestack[row,col,0,:] = mse0
            cistack[row,col,0,:] = ci0
            predict_msestack[row,col,0,:] = predict_mse0
            msestack[row,col,1,:] = mse1
            cistack[row,col,1,:] = ci1
            predict_msestack[row,col,1,:] = predict_mse1
            
            ymax = 1.05 * msestack.max() # set a common y axis range
            ypredmax = 1.05 * predict_msestack.max() # set a common y axis range
            x = np.arange(1,maxscale+1,dtype=float) # all scale factors on x axis
        
            # save MSE plots with predicted models for all chunks in one figure
            file = wd+'crutem4_mse/plots/crutem4_mse_'+str(row+1)+'_'+str(col+1)+'_all.tif'
            fig1 = plt.figure(figsize=(12.0, 9.0),dpi=150) # The size of the figure is specified as (width, height) in inches
            for ch in range(0,2):
                fig1 = plt.plot(x, msestack[row,col,ch,:], 'o', color=cols[ch], label = 'TS'+str(ch))
                fig1 = plt.plot(x, msestack[row,col,ch,:], '--', color=cols[ch])
                fig1 = plt.plot(x, msestack[row,col,ch,:] + cistack[row,col,ch,:], ':', color=cols[ch])
                fig1 = plt.plot(x, msestack[row,col,ch,:] - cistack[row,col,ch,:], ':', color=cols[ch])
                fig1 = plt.plot(x, predict_msestack[row,col,ch,:], '-', color=cols[ch])
            plt.legend()
            #plt.ylim(0,2)
            plt.xlabel('scale factor')
            plt.ylabel('SE')
            plt.title(('CRUTEM4, R%d C%d, r=%4.2f, m=%d, sd=%4.2f') % (row+1,col+1,r,m,sd))
            plt.savefig(file,figsize=(12.0, 9.0),dpi=150)
            print('Created file: '+file)

            # save model results to graphics file
            file = wd+'crutem4_mse/plots/crutem4_mse_'+str(row+1)+'_'+str(col+1)+'_all_models.tif'
            fig1 = plt.figure(figsize=(12.0, 9.0),dpi=150) # The size of the figure is specified as (width, height) in inches
            for ch in range(0,2):
                fig1 = plt.plot(x, predict_msestack[row,col,ch,:], '-', color=cols[ch], label = 'TS'+str(ch))
            plt.legend()
            plt.xlabel('scale factor')
            plt.ylabel('SE')
            plt.title(('CRUTEM4, R%d C%d, r=%4.2f, m=%d, sd=%4.2f') % (row+1,col+1,r,m,sd))
            plt.savefig(file,figsize=(12.0, 9.0),dpi=150)
            print('Created file: '+file)

            # save MSE anomaly plot to graphics file, with chunk 0 being the baseline
            file = wd+'crutem4_mse/plots/crutem4_mse_'+str(row+1)+'_'+str(col+1)+'_anomalies.tif'
            fig1 = plt.figure(figsize=(12.0, 9.0),dpi=150) # The size of the figure is specified as (width, height) in inches
            y = msestack[row,col,1,:] - msestack[row,col,0,:]
            fig1 = plt.plot(x, y, 'o', color=cols[ch], label = 'TS1 anomaly')
            fig1 = plt.plot(x, y, '--', color=cols[ch])
            plt.legend()
            plt.xlabel('scale factor')
            plt.ylabel('SE anomaly')
            plt.title(('CRUTEM4, R%d C%d, r=%4.2f, m=%d, sd=%4.2f') % (row+1,col+1,r,m,sd))
            fig1 = plt.plot([0,maxscale], [0,0], 'k', linestyle='dotted')
            plt.savefig(file,figsize=(12.0, 9.0),dpi=150)
            print('Created figure file: '+file)

            plt.close('all')
 
            # now do the next column, then the next row

        else:
            # If time series chunks are not good for analysis, print a warning message
            if (n1 < minlen) or (n2 < minlen):
                print('WARNING, Row '+str(row+1)+' Col '+str(col+1)+' excluded from analysis: Not enough data points. nmin='+str(minlen)+' n='+str(nt)+ ' n1='+str(n1)+' n2='+str(n2))
                

That took a while to process. Now let us save the results to some numpy data files.

# Your portfolio task

Inspect the output files and write a one-page summary of the MSE data analysis results for 2-3 different types of grid boxes. Describe what you see in the data and how you interpret the outputs.

In [None]:
# save the array with the MSE results to file
file = wd+'crutem4_mse/data/msestack.npy'
msestack.dump(file)
print('Created Numpy data file: '+file)

# save the array with the ci for the MSE results to file
file = wd+'crutem4_mse/data/cistack.npy'
cistack.dump(file)
print('Created Numpy data file: '+file)

# save the array with the crutem4v data to file
file = wd+'crutem4_mse/data/crutem4.npy'
crutem4.dump(file)
print('Created Numpy data file: '+file)

As the final step, we want to visualise two maps that show globally where the sample entropy in the data has changed between the first and second chunk of the time series. We are interested in two quantities: (i) the greatest difference between the sample entropy of chunk 1 and 2 across all scales, and (ii) the scale at which that greatest change in entropy has occurred.

In [None]:
##############################################################################
# Make global maps of MSE results
##############################################################################

# 1. Make a map of the greatest entropy change from TS1 to TS2
# Map shows the magnitude of the largest change in sample entropy detected across all scale factors 
# Negative values show a decrease of SE from T0 to T1
# Only statistically significant change is shown
msechange = ma.asarray(msestack[:,:,1,:] - msestack[:,:,0,:])
# mask out all grid cells where at least one of the chunks has zero value
msechange.mask = ma.mask_or(msestack[:,:,1,:] == 0, msestack[:,:,0,:] == 0)
# mask out spurious values (greater difference in SE than +- 2)
msechange.mask = ma.mask_or(msechange.mask, msechange < -2)
msechange.mask = ma.mask_or(msechange.mask, msechange > 2)
# find maximum absolute change values over all scale factors for each grid cell
msechangemax = msechange.max(axis=2)
msechangemin = msechange.min(axis=2)
mapdata = ma.copy(msechangemax)
mapdata[msechangemax > abs(msechangemin)] = msechangemax[msechangemax > abs(msechangemin)]
mapdata[msechangemax <= abs(msechangemin)] = msechangemin[msechangemax <= abs(msechangemin)]
file = wd+'crutem4_mse/plots/map_magn_of_maxSEchange.tif'
title = 'Largest change in SE'
z = max(mapdata.min(), mapdata.max(), key=abs)
zlim = -z, z
# define colour range
ymin = np.amin(mapdata)
ymax = np.amax(mapdata)
title = 'magnitude of greatest entropy change'
label='greatest SE change'
do_worldmap(mapdata, lat, lon, ymin, ymax, file, title, label)
print('Created file: '+file)

# 2. Make a map of the scale factor where the greatest change in SE occurs
scalefacmax = np.argmax(msechange, axis=2) # find locations of maximum values over all scale factors for each grid cell
scalefacmin = np.argmin(msechange, axis=2) # find locations of minimum values over all scale factors for each grid cell
mapdata = ma.copy(scalefacmax)
mapdata[msechangemax > abs(msechangemin)] = scalefacmax[msechangemax > abs(msechangemin)]
mapdata[msechangemax <= abs(msechangemin)] = scalefacmin[msechangemax <= abs(msechangemin)]
mapdata = ma.asarray(mapdata)
mapdata.mask = msechangemax.mask
file = wd+'crutem4_mse/plots/map_scalefactor_of_maxSEchange.tif'
title = 'Scale factor with biggest change in SE'
zlim = 0, mapdata.max()
# define colour range
ymin = np.amin(mapdata)
ymax = np.amax(mapdata)
title = 'scale factor of greatest entropy change'
label = 'scale factor'
do_worldmap(mapdata, lat, lon, ymin, ymax, file, title, label)
print('Created file: '+file)

Neat, isn't it? Take a look at the global maps.

# Your portfolio task

Inspect the output map files and write a half-page summary of the data and your interpretation of what they show for your portfolio, describing the 2 maps. Include the 2 maps on the second half of the page.