In [4]:
from mpetools import IslandTime, TimeSeriesPreProcess
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import pytz
import os
import zipfile
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import pymannkendall as mk
import statsmodels.api as sm
import tsfresh

%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [83]:
polygon = np.flip(np.array([[[0.344522, 73.499597],
                [0.321674, 73.499597],
                [0.321674, 73.525570],
                [0.344522, 73.525570],
                [0.344522, 73.499597]]])).tolist()

In [84]:
island_info = IslandTime.PreTimeSeries('Maavaarulu', 'Maldives', polygon=polygon).main()


-------------------------------------------------------------------
Retrieving general and spatial information about the island
Island: Maavaarulu, Maldives
-------------------------------------------------------------------

~ All information will be extracted/calculated. ~
Island not available in OpenStreetMap. Will use other methods.
Coordinates from Wikipedia (lat/lon): 0.4166666666666667 73.16666666666667
Coordinates from GeoKeo (lat/lon): nan nan
S2 73
L5 0
L7 2
L8 54
L9 9
No other information available.
~ Retrieving information from Duvat & Magnan (2019). ~
Island not found in the database.


In [47]:
df2 = df['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_0']['monthly']
df2.plot()

<Axes: >

In [53]:
for yr in range(2013, 2023):
    dr = pd.date_range(datetime.datetime(yr, 4, 1), datetime.datetime(yr, 9, 1), freq='M')
    df_dr = pd.DataFrame(index=dr)
    
    if yr == 2013:
        df_dr_t = df_dr
    
    else:
        df_dr_t = pd.concat([df_dr_t, df_dr])

df_dr_t.index = [pytz.utc.localize(df_dr_t.index[i]) for i in range(len(df_dr_t.index))]

In [58]:
condition = df2.index.isin(df_dr_t.index)
mask = pd.Series(condition, index=df2.index)

In [76]:
masked_df = df2.where(mask, np.nan)

In [77]:
masked_df.plot()

<Axes: >

In [89]:
island_info['timeseries_coastsat']['timeseries'].plot()

<Axes: xlabel='datetime'>

In [87]:
island_info = IslandTime.retrieve_island_info("Kanduhulhudhoo", 'Maldives')
#df = TimeSeriesPreProcess.TimeSeriesPreProcess(island_info).main()


-------------------------------------------------------------------
Retrieving all information available for the island.
Island: Kanduhulhudhoo, Maldives
-------------------------------------------------------------------

~ The following information is available: ~

general_info
               island
               country
               part of
               located in the administrative territorial entity
               located in or next to body of water
spatial_reference
               latitude
               longitude
               polygon
               polygon_OSM
               reference_shoreline
               transects
               transects_direction
               area_country
image_collection_dict
               description
               S2
               L7
               L8
               L9
timeseries_CRW
               description
               description_timeseries
               source
               timeseries
timeseries_nighttime_light
               desc

In [11]:
path = os.path.join(os.getcwd(), 'data', 'iod.json')
import json
dd = json.load(open(path))

In [42]:
date = [datetime.datetime(int(float(dd['items'][i]['x'])), 
                          month=int((float(dd['items'][i]['x']) - int(float(dd['items'][i]['x']))) * 12) + 1, 
                          day=int((float(dd['items'][i]['x']) - int(float(dd['items'][i]['x']))) * 31 + 1))
                          for i in range(len(dd['items']))]
iod = [float(dd['items'][i]['y']) for i in range(len(dd['items']))]

plt.plot(date, iod)
df_iod = pd.DataFrame(iod, index=date, columns=['iod']).groupby([pd.Grouper(freq='M')]).mean()

[<matplotlib.lines.Line2D at 0x1887356db90>]

In [197]:
df1 = TimeSeriesPreProcess.TimeSeriesPreProcess(island_info, date_range=['2015-01-01', '2020-01-01']).main()


-------------------------------------------------------------------
Time series pre-processing
Retrieving all available time series between 2015-01-01 and 2020-01-01 at a monthly frequency
Island: Keredhdhoo, Maldives
-------------------------------------------------------------------



In [35]:
reference_shoreline = island_info['spatial_reference']['reference_shoreline']
plt.plot(reference_shoreline[:, 0], reference_shoreline[:, 1], 'k-')
transects = island_info['spatial_reference']['transects']
for t in transects:
    plt.plot(transects[t][:, 0], transects[t][:, 1], 'r-')
    plt.text(transects[t][-1, 0], transects[t][-1, 1], t, fontsize=12)

plt.axis('equal')

(8146162.545596638, 8146429.111857782, 95136.74954338223, 95809.2182611987)

In [198]:
df2 = df1['preprocessed_timeseries']['optimal time period']['dict_timeseries']['coastline_position_transect_8']['monthly']
ts = df2[['coastline_position_transect_8', 'sea_level_anomaly', 'total_precipitation', 'sea_surface_temperature', 'mean_sea_level_pressure', '2_metre_temperature', 'wave_energy_of_combined_wind_waves_and_swell']]

In [169]:
df_iod.plot()
plt.xlim(datetime.datetime(2015, 1, 1), datetime.datetime(2023, 1, 1))
plt.axhline(0, color='k', alpha=0.5, linestyle='--')

<matplotlib.lines.Line2D at 0x188761ea390>

In [168]:
plt.plot(df_iod.index, df_iod.iod, color='k')
plt.fill_between(df_iod.index, df_iod.iod, 0, where=df_iod.iod > 0, color='purple', alpha=0.5)
plt.fill_between(df_iod.index, df_iod.iod, 0, where=df_iod.iod < 0, color='green', alpha=0.5)
plt.xlim(datetime.datetime(2013, 1, 1), datetime.datetime(2023, 1, 1))


(15706.0, 19358.0)

In [199]:
from statsmodels.tsa.seasonal import STL

stl = STL(ts['coastline_position_transect_8'], seasonal=13)
result = stl.fit()
result.trend

2014-11-30 00:00:00+00:00    84.635702
2014-12-31 00:00:00+00:00    84.669637
2015-01-31 00:00:00+00:00    84.710946
2015-02-28 00:00:00+00:00    84.763778
2015-03-31 00:00:00+00:00    84.826106
                               ...    
2022-04-30 00:00:00+00:00    77.051949
2022-05-31 00:00:00+00:00    76.550159
2022-06-30 00:00:00+00:00    76.040818
2022-07-31 00:00:00+00:00    75.522157
2022-08-31 00:00:00+00:00    74.995910
Freq: M, Name: trend, Length: 94, dtype: float64

In [206]:
plt.plot(ts['coastline_position_transect_8'].index, ts['coastline_position_transect_8'].values, color='k')
plt.plot(ts['coastline_position_transect_8'].index, result.trend, color='r', label='trend analysis')

'''
# Indian Monsoon
for yr in range(2013, 2023):
    if yr == 2013:
        plt.fill_between(pd.date_range(datetime.datetime(yr, 4, 1), datetime.datetime(yr, 9, 1), freq='M'), min(ts['coastline_position_transect_8']), max(ts['coastline_position_transect_8']), color='orange', alpha=0.2, label='Indian Monsoon')
    else:
        plt.fill_between(pd.date_range(datetime.datetime(yr, 4, 1), datetime.datetime(yr, 9, 1), freq='M'), min(ts['coastline_position_transect_8']), max(ts['coastline_position_transect_8']), color='orange', alpha=0.2)
 '''
# El Nino
#plt.fill_between(pd.date_range(datetime.datetime(2015, 3, 1), datetime.datetime(2016, 5, 1), freq='M'), min(ts['coastline_position_transect_8']), max(ts['coastline_position_transect_8']), color='r', alpha=0.2, label='2015/2016 El Nino')

# La Nina
#plt.fill_between(pd.date_range(datetime.datetime(2020, 8, 1), datetime.datetime(2021, 5, 1), freq='M'), min(ts['coastline_position_transect_8']), max(ts['coastline_position_transect_8']), color='b', alpha=0.2, label='2020/2021 La Nina')

# 2021 storm
plt.axvline(datetime.datetime(2021, 5, 1), color='grey', linestyle='--', label='2021 storm')

# IOD positive
plt.fill_between(pd.date_range(datetime.datetime(2015, 1, 1), datetime.datetime(2016, 4, 1), freq='M'), min(ts['coastline_position_transect_8']), max(ts['coastline_position_transect_8']), color='purple', alpha=0.2, label='positive IOD')
plt.fill_between(pd.date_range(datetime.datetime(2018, 12, 1), datetime.datetime(2020, 7, 1), freq='M'), min(ts['coastline_position_transect_8']), max(ts['coastline_position_transect_8']), color='purple', alpha=0.2)

# IOD negative
plt.fill_between(pd.date_range(datetime.datetime(2016, 4, 1), datetime.datetime(2018, 12, 1), freq='M'), min(ts['coastline_position_transect_8']), max(ts['coastline_position_transect_8']), color='green', alpha=0.2, label='negative IOD')
plt.fill_between(pd.date_range(datetime.datetime(2020, 7, 1), datetime.datetime(2022, 8, 1), freq='M'), min(ts['coastline_position_transect_8']), max(ts['coastline_position_transect_8']), color='green', alpha=0.2)

plt.xlim(datetime.datetime(2015, 1, 1), datetime.datetime(2022, 8, 1))
plt.legend()

<matplotlib.legend.Legend at 0x188594316d0>

In [61]:
plt.plot(ts['coastline_position_transect_6'], ts['total_precipitation'], 'o')

[<matplotlib.lines.Line2D at 0x22f2859dd50>]

In [71]:
ts['coastline_position_transect_8'].plot()

<Axes: >

In [74]:
import seaborn as sns

In [76]:
sns.regplot(data = ts, x = 'coastline_position_transect_8', y ='sea_level_anomaly')

<Axes: xlabel='coastline_position_transect_8', ylabel='sea_level_anomaly'>

In [80]:
from statsmodels.tsa.stattools import adfuller
def adf_test(timeseries):
    dftest = adfuller(timeseries, autolag='AIC')
    result = pd.Series(dftest[0:4], index=['Test Statistic','P-value','Lags Used','No of Observations'])
    for key,value in dftest[4].items():
        result['Critical Value (%s)'%key] = value
    return result
    
ts.apply(adf_test, axis = 0)

Unnamed: 0,coastline_position_transect_8,sea_level_anomaly,total_precipitation,sea_surface_temperature,mean_sea_level_pressure,2_metre_temperature,wave_energy_of_combined_wind_waves_and_swell
Test Statistic,-4.925895,-3.729386,-8.6372,-1.838609,-5.095649,-6.034048,-2.237991
P-value,3.1e-05,0.003718,5.549446e-14,0.361469,1.4e-05,1.395755e-07,0.192747
Lags Used,4.0,0.0,0.0,12.0,0.0,5.0,11.0
No of Observations,92.0,96.0,96.0,84.0,96.0,91.0,85.0
Critical Value (1%),-3.503515,-3.500379,-3.500379,-3.510712,-3.500379,-3.504343,-3.509736
Critical Value (5%),-2.893508,-2.892152,-2.892152,-2.896616,-2.892152,-2.893866,-2.896195
Critical Value (10%),-2.583824,-2.5831,-2.5831,-2.585482,-2.5831,-2.584015,-2.585258


In [90]:
from scipy import signal
def ccf_values(series1, series2):
    p = series1
    q = series2
    p = (p - np.mean(p)) / (np.std(p) * len(p))
    q = (q - np.mean(q)) / (np.std(q))  
    c = np.correlate(p, q, 'full')
    return c
    
ccf_ielts = ccf_values(ts['wave_energy_of_combined_wind_waves_and_swell'], ts['coastline_position_transect_8'])
ccf_ielts

array([-2.85931110e-03, -2.52770811e-03,  1.92313478e-02,  2.58492237e-02,
        1.93432045e-02,  4.51015870e-03, -8.49969099e-03, -3.90869857e-02,
       -6.76287709e-02, -8.70060408e-02, -8.15150227e-02, -5.03735474e-02,
        3.23268987e-03,  5.53253326e-02,  1.00792563e-01,  1.08089129e-01,
        7.06030816e-02,  4.18643847e-03, -5.88816946e-02, -9.35966987e-02,
       -1.30778424e-01, -1.52381164e-01, -1.26039967e-01, -3.76278186e-02,
        4.09568103e-02,  1.33357308e-01,  1.75084034e-01,  1.96346048e-01,
        1.38635136e-01,  7.68942595e-02, -2.00439169e-02, -8.49267010e-02,
       -1.55643471e-01, -1.81308728e-01, -1.58825212e-01, -6.87933079e-02,
        4.02991293e-02,  1.25885475e-01,  1.96321292e-01,  2.14926627e-01,
        2.24794338e-01,  9.19014884e-02,  2.07014397e-02, -1.12221331e-01,
       -1.77940502e-01, -2.54023676e-01, -2.12290591e-01, -1.34709038e-01,
        7.47286403e-03,  1.68394465e-01,  2.26990349e-01,  2.69089661e-01,
        2.35161602e-01,  

In [91]:
import statsmodels
statsmodels.tsa.stattools.grangercausalitytests(ts[['wave_energy_of_combined_wind_waves_and_swell', 'coastline_position_transect_8']], maxlag=24)


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=8.8220  , p=0.0038  , df_denom=93, df_num=1
ssr based chi2 test:   chi2=9.1066  , p=0.0025  , df=1
likelihood ratio test: chi2=8.7002  , p=0.0032  , df=1
parameter F test:         F=8.8220  , p=0.0038  , df_denom=93, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=4.9585  , p=0.0091  , df_denom=90, df_num=2
ssr based chi2 test:   chi2=10.4679 , p=0.0053  , df=2
likelihood ratio test: chi2=9.9303  , p=0.0070  , df=2
parameter F test:         F=4.9585  , p=0.0091  , df_denom=90, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=2.4513  , p=0.0688  , df_denom=87, df_num=3
ssr based chi2 test:   chi2=7.9457  , p=0.0471  , df=3
likelihood ratio test: chi2=7.6276  , p=0.0544  , df=3
parameter F test:         F=2.4513  , p=0.0688  , df_denom=87, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.3455  , p=0.2599  , df_d

{1: ({'ssr_ftest': (8.821993726490858, 0.003786574749331714, 93.0, 1),
   'ssr_chi2test': (9.106574169280885, 0.002546924334512347, 1),
   'lrtest': (8.700157058101752, 0.003181826339078901, 1),
   'params_ftest': (8.821993726490867, 0.003786574749331714, 93.0, 1.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x22f0ec3a510>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x22f2ed636d0>,
   array([[0., 1., 0.]])]),
 2: ({'ssr_ftest': (4.958461641794639, 0.009060570926936169, 90.0, 2),
   'ssr_chi2test': (10.467863466010906, 0.005332517995155868, 2),
   'lrtest': (9.93029330546625, 0.0069769274485908936, 2),
   'params_ftest': (4.958461641794631, 0.00906057092693627, 90.0, 2.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x22f2ed60a50>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x22f2ed60710>,
   array([[0., 0., 1., 0., 0.],
          [0., 0., 0., 1., 0.]])]),
 3: ({'ssr_ftest': (2.45132266

In [92]:
lags = signal.correlation_lags(len(ts['wave_energy_of_combined_wind_waves_and_swell']), len(ts['coastline_position_transect_8']))

def ccf_plot(lags, ccf):
    fig, ax =plt.subplots(figsize=(9, 6))
    ax.plot(lags, ccf)
    ax.axhline(-2/np.sqrt(23), color='red', label='5% confidence interval')
    ax.axhline(2/np.sqrt(23), color='red')
    ax.axvline(x = 0, color = 'black', lw = 1)
    ax.axhline(y = 0, color = 'black', lw = 1)
    ax.axhline(y = np.max(ccf), color = 'blue', lw = 1, 
    linestyle='--', label = 'highest +/- correlation')
    ax.axhline(y = np.min(ccf), color = 'blue', lw = 1, 
    linestyle='--')
    ax.set(ylim = [-1, 1])
    ax.set_title('Cross Correation IElTS Search and Registeration Count', weight='bold', fontsize = 15)
    ax.set_ylabel('Correlation Coefficients', weight='bold', 
    fontsize = 12)
    ax.set_xlabel('Time Lags', weight='bold', fontsize = 12)
    plt.legend()
    
ccf_plot(lags, ccf_ielts)

In [52]:
col = 'coastline_position_transect_0'
df2[col].plot()
fig, ax = plt.subplots(figsize=(12, 8))
sm.graphics.tsa.plot_acf(df2[col], lags=25, ax=ax)
fig, ax = plt.subplots(figsize=(12, 8))
sm.graphics.tsa.plot_pacf(df2[col], lags=25, ax=ax)
print(mk.original_test(df2[col].values))

Mann_Kendall_Test(trend='no trend', h=False, p=0.42479090505531136, z=-0.7981371498910443, Tau=-0.05742821473158552, s=-230.0, var_s=82322.0, slope=-0.010519514596271762, intercept=225.1649923156301)


In [100]:
from statsmodels.tsa.seasonal import STL
# Apply STL decomposition
stl = STL(df2['2_metre_temperature'], seasonal=13)
result = stl.fit()
fig = result.plot()

# Extract the components
trend = result.trend
seasonal = result.seasonal
residual = result.resid