In [2]:
from IslandTime import TimeSeriesConnections, retrieve_island_info, plot_shoreline_transects, Workflow, TimeSeriesERA5
import numpy as np
import pandas as pd

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
island_info = Workflow('Dhakandhoo', 'Maldives', execute_preprocess=True, overwrite_preprocess=True).main()


-------------------------------------------------------------------
Retrieving all information available for the island
Island: Dhakandhoo, Maldives
-------------------------------------------------------------------

~ The following information is available: ~

general_info
               island
               country
               part of
               located in the administrative territorial entity
               located in or next to body of water
               elevation above sea level
               atoll
spatial_reference
               latitude
               longitude
               polygon
               polygon_OSM
               reference_shoreline
               transects
               transects_direction
               area_country
               reference_shoreline_buffer_L8
               reference_shoreline_buffer_L9
               reference_shoreline_buffer_S2
image_collection_dict
               description
               S2
               L5
               L7


In [141]:
island_info['timeseries_sea_level_anomaly']['timeseries'].index[0]
island_info['timeseries_ERA5']['timeseries'].index[0]

Timestamp('2010-01-01 00:00:00+0000', tz='UTC')

In [156]:
df_cc = pd.concat([island_info['timeseries_ERA5']['timeseries'], island_info['timeseries_sea_level_anomaly']['timeseries']], axis=0)

# monthly mean
df_ccf = df_cc.resample('M').mean().dropna()

In [163]:
subset_ccf = df_ccf[['wave_energy_of_combined_wind_waves_and_swell', 'sea_level_anomaly', 'sea_surface_temperature', 'total_precipitation', '2_metre_dewpoint_temperature', 'mean_wave_direction', 'wind_direction_10m', 'wind_speed_10m', 'mean_sea_level_pressure', '2_metre_temperature', 'evaporation']]

In [49]:
island_info['timeseries_ERA5']['timeseries'].columns

Index(['2_metre_dewpoint_temperature', '2_metre_temperature',
       'soil_temperature_level_1', 'total_precipitation', 'evaporation',
       'sea_surface_temperature', 'mean_sea_level_pressure',
       'mean_direction_of_total_swell', 'mean_direction_of_wind_waves',
       'mean_wave_direction', 'wind_speed_10m', 'wind_direction_10m',
       'wind_direction_true_10m', 'wave_energy_of_total_swell',
       'wave_energy_of_wind_waves',
       'wave_energy_of_combined_wind_waves_and_swell'],
      dtype='object')

In [5]:
island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly'].keys()

Index(['coastline_position_transect_2_waterline',
       'sea_surface_temperature_NOAACRW', 'sea_level_anomaly',
       '2_metre_dewpoint_temperature', '2_metre_temperature',
       'soil_temperature_level_1', 'total_precipitation', 'evaporation',
       'sea_surface_temperature', 'mean_sea_level_pressure',
       'mean_direction_of_total_swell', 'mean_direction_of_wind_waves',
       'mean_wave_direction', 'wind_speed_10m', 'wind_direction_10m',
       'wind_direction_true_10m', 'wind_u10', 'wind_v10',
       'wave_energy_of_total_swell', 'wave_energy_of_wind_waves',
       'wave_energy_of_combined_wind_waves_and_swell'],
      dtype='object')

In [42]:
df['items']

0       {'id': 149882, 'x': '1992.7595628', 'y': '-0.8...
1       {'id': 149885, 'x': '1992.7786885', 'y': '-1.0...
2       {'id': 149888, 'x': '1992.7978142', 'y': '-1.2...
3       {'id': 149891, 'x': '1992.8169399', 'y': '-1.4...
4       {'id': 149894, 'x': '1992.8360656', 'y': '-1.1...
                              ...                        
1642    {'id': 154808, 'x': '2024.420765', 'y': '0.541...
1643    {'id': 154811, 'x': '2024.4398907', 'y': '0.48...
1644    {'id': 154814, 'x': '2024.4590164', 'y': '0.19...
1645    {'id': 154817, 'x': '2024.4781421', 'y': '0.13...
1646    {'id': 154820, 'x': '2024.4972678', 'y': '0.11...
Name: items, Length: 1647, dtype: object

In [175]:
# read json file
import json
from datetime import datetime, timedelta
import pytz
with open('geojson//indian_ocean_dipole_index.json') as json_file:
    data = json.load(json_file)

# Function to convert fractional years to datetime
def convert_fractional_year_to_datetime(year):
    if type(year) is str:
        year = float(year)
    year_int = int(year)
    remainder = year - year_int
    start_of_year = datetime(year_int, 1, 1)
    n_days = (datetime(year_int + 1, 1, 1) - start_of_year).days
    return start_of_year + timedelta(days=remainder * n_days)

dict_iod = {}

for i in range(len(data['items'])):
    date = convert_fractional_year_to_datetime(data['items'][i]['x'])
    # make tz-aware
    date = date.replace(tzinfo=pytz.utc)
    dict_iod[date] = float(data['items'][i]['y'])

In [184]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
df_iod = pd.DataFrame(dict_iod.values(), index=dict_iod.keys(), columns=['iod'])
# monthly mean
df_iod = df_iod.resample('M').mean().dropna()
df_iod.plot(ax=ax)

<Axes: >

In [185]:
ts_df_total = pd.concat([ts_df, df_iod], axis=1) 
ts_df_total = ts_df_total.resample('M').mean().dropna()
ts_df_total['iod'].plot()
plt.show()

In [188]:
plot_shoreline_transects(island_info)

In [183]:
island_info = retrieve_island_info('Dhakandhoo', 'Maldives', verbose=False)

ts = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['coastline_position_transect_2_waterline']
ts2 = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_55_waterline']['monthly']['coastline_position_transect_55_waterline']
ts3 = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_33_waterline']['monthly']['coastline_position_transect_33_waterline']
ts4 = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_80_waterline']['monthly']['coastline_position_transect_80_waterline']
ts_we = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['wave_energy_of_combined_wind_waves_and_swell']
ts_sl = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['sea_level_anomaly']
ts_sst = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['sea_surface_temperature']
ts_tp = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['total_precipitation']
ts_dtemp = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['2_metre_dewpoint_temperature']
ts_wd = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['mean_wave_direction']
ts_windd = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['wind_direction_10m']
ts_winds = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['wind_speed_10m']
ts_ap = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['mean_sea_level_pressure']
ts_temp = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['2_metre_temperature']
ts_eva = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['evaporation']
ts_wind_u = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['wind_u10']
ts_wind_v = island_info['timeseries_preprocessing']['optimal time period']['dict_timeseries']['coastline_position_transect_2_waterline']['monthly']['wind_v10']

import Rbeast as rb

o = rb.beast(ts, start=[ts.index[0].year, ts.index[0].month, ts.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o2 = rb.beast(ts2, start=[ts2.index[0].year, ts2.index[0].month, ts2.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o3 = rb.beast(ts3, start=[ts3.index[0].year, ts3.index[0].month, ts3.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o4 = rb.beast(ts4, start=[ts4.index[0].year, ts4.index[0].month, ts4.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_we = rb.beast(ts_we, start=[ts_we.index[0].year, ts_we.index[0].month, ts_we.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_sl = rb.beast(ts_sl, start=[ts_sl.index[0].year, ts_sl.index[0].month, ts_sl.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_sst = rb.beast(ts_sst, start=[ts_sst.index[0].year, ts_sst.index[0].month, ts_sst.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_tp = rb.beast(ts_tp, start=[ts_tp.index[0].year, ts_tp.index[0].month, ts_tp.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_dtemp = rb.beast(ts_temp, start=[ts_temp.index[0].year, ts_temp.index[0].month, ts_temp.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_wd = rb.beast(ts_wd, start=[ts_wd.index[0].year, ts_wd.index[0].month, ts_wd.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_windd = rb.beast(ts_windd, start=[ts_windd.index[0].year, ts_windd.index[0].month, ts_windd.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_winds = rb.beast(ts_winds, start=[ts_winds.index[0].year, ts_winds.index[0].month, ts_winds.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_ap = rb.beast(ts_ap, start=[ts_ap.index[0].year, ts_ap.index[0].month, ts_ap.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_temp = rb.beast(ts_temp, start=[ts_temp.index[0].year, ts_temp.index[0].month, ts_temp.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_eva = rb.beast(ts_eva, start=[ts_eva.index[0].year, ts_eva.index[0].month, ts_eva.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_wind_u = rb.beast(ts_wind_u, start=[ts_wind_u.index[0].year, ts_wind_u.index[0].month, ts_wind_u.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
o_wind_v = rb.beast(ts_wind_v, start=[ts_wind_v.index[0].year, ts_wind_v.index[0].month, ts_wind_v.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)
# o_wd = rb.beast(ts_wd, start=[ts_wd.index[0].year, ts_wd.index[0].month, ts_wd.index[0].day], season='harmonic', deltat='1/12 year', period='1 year', quiet=True, print_progress=False)

# rb.plot(o_winds)

names = ['coastline_position', 'wave_energy_of_combined_wind_waves_and_swell', 'sea_level_anomaly', 
         'sea_surface_temperature', 'total_precipitation', '2_metre_dewpoint_temperature', 'mean_wave_direction', 
         'wind_direction_10m', 'wind_speed_10m', 'mean_sea_level_pressure', '2_metre_temperature', 'evaporation',
         'wind_u10', 'wind_v10', 'coastline_position_transect_55_waterline', 'coastline_position_transect_33_waterline', 'coastline_position_transect_80_waterline']
o_list = [o, o_we, o_sl, o_sst, o_tp, o_temp, o_wd, o_windd, o_winds, o_ap, o_dtemp, o_eva, o_wind_u, o_wind_v, o2, o3, o4]
ts_list = [ts, ts_we, ts_sl, ts_sst, ts_tp, ts_temp, ts_wd, ts_windd, ts_winds, ts_ap, ts_dtemp, ts_eva, ts_wind_u, ts_wind_v, ts2, ts3, ts4]
ts_dict, ts_dict_trend, ts_dict_seasonal, ts_dict_res, ts_dict_detrended = {}, {}, {}, {}, {}
for tss, ots, name in zip(ts_list, o_list, names):
    ts_dict[name] = tss
    ts_dict_trend[name] = ots.trend.Y
    ts_dict_seasonal[name] = ots.season.Y
    ts_dict_detrended[name] = tss - ots.trend.Y
    ts_dict_res[name] = ts - ots.trend.Y - ots.season.Y


# ts_dict = {'wave_energy': ts_we, 'coastline_position': ts, 'sea_level_anomaly': ts_sl, 'sst': ts_sst} #, 'mean_wave_direction': ts_wd}
# ts_dict_trend = {'wave_energy': o_we.trend.Y, 'coastline_position': o.trend.Y, 'sea_level_anomaly': o_sl.trend.Y, 'sst': o_sst.trend.Y} #, 'mean_wave_direction': o_wd.trend.Y}
# ts_dict_seasonal = {'wave_energy': o_we.season.Y, 'coastline_position': o.season.Y, 'sea_level_anomaly': o_sl.season.Y, 'sst': o_sst.season.Y} #, 'mean_wave_direction': o_wd.season.Y}
# ts_dict_res = {'wave_energy': ts_we - o_we.trend.Y - o_we.season.Y, 'coastline_position': ts - o.trend.Y - o.season.Y, 'sea_level_anomaly': ts_sl - o_sl.trend.Y - o_sl.season.Y, 'sst': ts_sst - o_sst.trend.Y - o_sst.season.Y}#, 'mean_wave_direction': ts_wd - o_wd.trend.Y - o_wd.season.Y}

ts_df = pd.DataFrame(ts_dict)
ts_df_trend = pd.DataFrame(ts_dict_trend)
ts_df_seasonal = pd.DataFrame(ts_dict_seasonal)
ts_df_res = pd.DataFrame(ts_dict_res)
ts_df_detrended = pd.DataFrame(ts_dict_detrended)





















In [89]:
fig, ax = plt.subplots()
island_info['timeseries_climate_indices']['timeseries']['Dipole Mode Index (Indian Ocean Dipole Index)'].plot(ax=ax)

# normalise iod between -1 and 1
ts_df_total['iod_n'] = (ts_df_total['iod'] - ts_df_total['iod'].mean()) / (ts_df_total['iod'].max() - ts_df_total['iod'].min())
ts_df_total['iod_n'].plot(ax=ax)
plt.show()

In [186]:
dd = ts_df_total[['coastline_position', 'wave_energy_of_combined_wind_waves_and_swell', 
            'sea_level_anomaly', 'sea_surface_temperature', 
            'total_precipitation', '2_metre_temperature', 
            'evaporation', 'mean_sea_level_pressure',
            '2_metre_dewpoint_temperature', 'mean_wave_direction',
            'wind_u10', 'wind_v10', 'iod']].to_dict()

ts_df_total2 = ts_df_total.copy()
ts_df_total2['wave_ucomp'] = ts_df_total['wave_energy_of_combined_wind_waves_and_swell'] * np.cos(np.deg2rad(90-ts_df_total['mean_wave_direction']%360))
ts_df_total2['wave_vcomp'] = ts_df_total['wave_energy_of_combined_wind_waves_and_swell'] * np.sin(np.deg2rad(90-ts_df_total['mean_wave_direction']%360))

dd = ts_df_total[['wind_u10', 'wind_v10', 'iod', '2_metre_temperature', 'sea_surface_temperature', 'mean_sea_level_pressure', 'total_precipitation', 'evaporation']].to_dict()

dd = ts_df_total2[['coastline_position', 'wave_energy_of_combined_wind_waves_and_swell', 'sea_level_anomaly', 'mean_wave_direction', 'sea_surface_temperature']].to_dict()

dd = ts_df_total2[['2_metre_temperature', 'coastline_position', 'wave_ucomp', 'wave_vcomp', 'sea_level_anomaly', 'sea_surface_temperature', 'mean_sea_level_pressure', 'wind_u10', 'wind_v10', 'iod', 'total_precipitation', 'evaporation']].to_dict()

dd = ts_df_total2[['coastline_position', 'wave_ucomp', 'wave_vcomp', 'sea_level_anomaly', 'mean_sea_level_pressure', 'sea_surface_temperature', '2_metre_temperature', 'total_precipitation', 'evaporation', 'wind_u10', 'wind_v10', '2_metre_dewpoint_temperature', 'iod']].to_dict()


dd = ts_df_total2[['coastline_position', 'wave_ucomp', 'wave_vcomp', 'sea_level_anomaly', 'sea_surface_temperature', 'coastline_position_transect_55_waterline']].to_dict()

dd = ts_df_total2[['coastline_position', 'coastline_position_transect_55_waterline', 'coastline_position_transect_80_waterline', 'coastline_position_transect_33_waterline']].to_dict()

# dd = ts_df_trend[['coastline_position', 'sea_level_anomaly']].to_dict()
# dd = ts_df[['wave_energy_of_combined_wind_waves_and_swell', 'sea_level_anomaly']].to_dict()

In [189]:
res = TimeSeriesConnections(dd, run_combinations=False, run_causal_inference_discovery=True, model_causal_discovery='PCMCIplus', data_causal_discovery='raw').main()


-------------------------------------------------------------------
Evaluating time series connections
-------------------------------------------------------------------





Lag: 1
--- Causal Graph Discovery ---

##
## Estimating lagged dependencies 
##

Parameters:

independence test = robust_par_corr
tau_min = 0
tau_max = 1

##
## Step 1: PC1 algorithm for selecting lagged conditions
##

Parameters:
independence test = robust_par_corr
tau_min = 1
tau_max = 1
pc_alpha = [0.05]
max_conds_dim = None
max_combinations = 1



## Resulting lagged parent (super)sets:

    Variable coastline_position has 2 link(s):
        (coastline_position_transect_55_waterline -1): max_pval = 0.00001, |min_val| =  0.452
        (coastline_position -1): max_pval = 0.00096, |min_val| =  0.344

    Variable coastline_position_transect_55_waterline has 2 link(s):
        (coastline_position_transect_55_waterline -1): max_pval = 0.00001, |min_val| =  0.451
        (coastline_position -1): max_pval = 0.01681

In [147]:
import tigramite.plotting as tp

df_results_total_agg = res.copy()
var = list(dd.keys())
for n_sigma in range(1, 4):
    idxx = np.where(df_results_total_agg.frequency.values >= n_sigma)
    df = df_results_total_agg.iloc[idxx[0]]
    df = df.reset_index()

    tau_max = max((df.tau.values).astype(int))
    n_var = len(var)
    var_of_interest = 'coastline_position'
    idx_var_of_interest = np.where(np.array(var) == var_of_interest)[0][0]

    graph_mean = np.full((n_var, n_var, tau_max+1), '', dtype=object)
    val_matrix_mean = np.zeros((n_var, n_var, tau_max+1))

    set_of_interest = []

    for idx, row in df.iterrows():
        fvar, edge, svar = row['causal link'].split(' ')
        i_fvar, i_svar = np.where(np.array(var) == fvar)[0][0], np.where(np.array(var) == svar)[0][0]
        
        if (fvar == var_of_interest and edge == '-->') or (svar == var_of_interest and edge == '<--'):
            continue

        if int(row['tau']) > 12:
            continue
 
        if edge == '-->' or edge == '<--':
            graph_mean[i_fvar, i_svar, int(row['tau'])] = edge
            val_matrix_mean[i_fvar, i_svar, int(row['tau'])] = row['val_matrix']
            val_matrix_mean[i_svar, i_fvar, int(row['tau'])] = row['val_matrix']

            if (fvar == var_of_interest) or (svar == var_of_interest):
                set_of_interest.append([i_fvar, int(row['tau']), i_svar])

    tp.plot_graph(graph=graph_mean, val_matrix=val_matrix_mean, var_names=var, show_autodependency_lags=False)
    # plt.savefig('stability_graph_transect_{}_sigma_{}.png'.format(transect, n_sigma), dpi=300, bbox_inches='tight')
    plt.show()

    # # Initialize dataframe object, specify variable names
    # med.fit_model(all_parents=toys.dag_to_links(graph_mean), tau_max=tau_max)
    # val_matrix_ce = med.get_val_matrix(symmetrize=True)
    # med.fit_model_bootstrap(boot_blocklength=1, seed=42, boot_samples=100)

    # size_plot = []
    # labels_plot = []

    # for set_ce in set_of_interest:
    #     i_ce, tau_ce, j_ce = set_ce
        
    #     # Get Causal effect and 90% confidence interval
    #     size_plot.append(abs(med.get_ce(i=i_ce, tau=-tau_ce,  j=j_ce)))
    #     print(min(abs(med.get_bootstrap_of(function='get_ce', 
    #     function_args={'i':i_ce, 'tau':-tau_ce,  'j':j_ce}, conf_lev=0.90))))
    #     labels_plot.append(var[i_ce]+str(tau_ce))
    #     #idx = val_matrix[:, idx_var_of_interest, :].nonzero()

    # '''
    # plt.bar(['wave energy', 'sea level anomaly'], [val_matrix[:, 8, :][idx][0], val_matrix[:, 8, :][idx][1]], yerr=[0.10168095, 0.0815955], color=[colors[0], colors[1]], capsize=5)
    # plt.ylabel('Causal effect estimation')
    # plt.show()
    # '''
    # # Data for the chart
    # values = size_plot  # Sizes or proportions for each sector
    # categories = labels_plot  # Labels for each sector
    # colors = colors = list(mcolors.TABLEAU_COLORS.values())[:len(size_plot)]  # Colors for each sector

    # # Create a figure and axis
    # fig, ax = plt.subplots(subplot_kw={'projection': 'polar'})

    # # Convert values to radians
    # theta = np.linspace(0, 2 * np.pi, len(categories), endpoint=False)

    # # Plot the bars
    # bars = ax.bar(theta, values, width=0.4, color=colors, zorder=2)

    # # Set the axis limits
    # ax.set_ylim(0, max(values)+0.05)

    # # Set the angle of the labels
    # ax.set_xticks(theta)
    # ax.set_xticklabels(categories, fontdict={'fontsize': 15,
    # 'fontweight': 'bold',
    # 'verticalalignment': 'center',
    # 'horizontalalignment': 'center',
    # 'backgroundcolor': 'black',
    # 'color': 'white',
    # 'zorder': 3})

    # ax.spines['polar'].set_zorder(0)
    # # Set a title for the chart
    # ax.set_title('Causal effect estimation at transect {}'.format(transect), fontweight='bold', fontsize=15, pad=20)
    # # Display the chart
    # plt.savefig('causal_effect_estimation_transect_{}_sigma_{}.png'.format(transect, n_sigma), dpi=300, bbox_inches='tight')
    # plt.show()

In [73]:
from statsmodels.tsa.api import VAR

model = VAR(diff)
aic_res = []
for i in range(1, 13):
    results = model.fit(i)
    aic_res.append(results.aic)

aic_min = np.argmin(aic_res)
results = model.fit(maxlags=12, ic='aic')
print(results.summary())

from statsmodels.stats.stattools import durbin_watson

out = durbin_watson(results.resid)

for col, val in zip(diff.columns, out):
    print(col, ':', round(val, 2))

from statsmodels.tsa.stattools import grangercausalitytests

maxlag=12
test = 'ssr_chi2test'

def grangers_causation_matrix(data, variables, test='ssr_chi2test', verbose=False):    
   
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

grangers_causation_matrix(diff, variables = diff.columns)

  Summary of Regression Results   
Model:                         VAR
Method:                        OLS
Date:           Thu, 15, Aug, 2024
Time:                     15:14:23
--------------------------------------------------------------------
No. of Equations:         3.00000    BIC:                    1.11308
Nobs:                     80.0000    HQIC:                 -0.706352
Log likelihood:          -161.585    FPE:                   0.173542
AIC:                     -1.92401    Det(Omega_mle):       0.0599736
--------------------------------------------------------------------
Results for equation coastline_position
                                                      coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------------------------------------
const                                                   -0.262745         0.675877           -0.389           0.697
L1.coastline_position  

Unnamed: 0,coastline_position_x,wave_energy_of_combined_wind_waves_and_swell_x,sea_level_anomaly_x
coastline_position_y,1.0,0.0,0.0084
wave_energy_of_combined_wind_waves_and_swell_y,0.0,1.0,0.0
sea_level_anomaly_y,0.0,0.0,1.0


In [32]:
import matplotlib.pyplot as plt
plt.plot(diff['coastline_position'])
plt.plot(res['coastline_position'])
plt.plot(diff['wave_energy_of_combined_wind_waves_and_swell'])

[<matplotlib.lines.Line2D at 0x2488c4ffd10>]

In [48]:
# import adfuller
from statsmodels.tsa.stattools import adfuller

print(adfuller(diff['coastline_position']))
print(adfuller(res['coastline_position']))
print(adfuller(tsdf['coastline_position']))

(-6.4236363426264695, 1.7663796953858367e-08, 7, 83, {'1%': -3.5117123057187376, '5%': -2.8970475206326833, '10%': -2.5857126912469153}, 521.810957631601)
(-7.29454621467783, 1.388961387831121e-10, 2, 89, {'1%': -3.506057133647011, '5%': -2.8946066061911946, '10%': -2.5844100201994697}, 447.18631227258913)
(-4.260621374647641, 0.0005193769800400418, 2, 89, {'1%': -3.506057133647011, '5%': -2.8946066061911946, '10%': -2.5844100201994697}, 533.8410507208511)


In [45]:
# import plot_acf
from statsmodels.graphics.tsaplots import plot_acf
# plot_acf(diff['coastline_position'])
# plot_acf(res['coastline_position'])
# plot_acf(tsdf['coastline_position'])

plot_acf(diff['wave_energy_of_combined_wind_waves_and_swell'])
plot_acf(res['wave_energy_of_combined_wind_waves_and_swell'])
plot_acf(tsdf['wave_energy_of_combined_wind_waves_and_swell'])

<Figure size 640x480 with 1 Axes>

In [343]:
plt.plot(ts_df.index, data_0_1(ts_df['mean_wave_direction']))
plt.plot(ts_df.index, data_0_1(ts_df['sea_level_anomaly']))
plt.show()

In [298]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
from statsmodels.tsa.stattools import adfuller

import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint

def data_0_1(data):
    return (data - data.min()) / (data.max() - data.min())

def test_cointegration(ts1, ts2, max_lag=0, criterion='bic'):
    """
    Test if two time series are co-integrated and determine the optimal lag.

    Parameters:
    ts1, ts2: pd.Series or np.array
        The two time series to be tested for co-integration.
    max_lag: int
        The maximum lag to be considered in the co-integration test.
    criterion: str
        The information criterion to be used for lag selection ('aic', 'bic', 'hqic').

    Returns:
    p_value: float
        The p-value of the co-integration test.
    lag: int
        The optimal lag determined by the information criterion.
    is_cointegrated: bool
        True if the series are co-integrated, False otherwise.
    """
    
    # Ensure the input time series are pandas Series
    ts1 = pd.Series(ts1)
    ts2 = pd.Series(ts2)
    
    # Initialize variables to store results
    best_aic = np.inf
    best_lag = None
    best_p_value = None
    beta = None
    intercept = None
    
    for lag in range(max_lag + 1):
        # Create lagged series for ts2
        ts2_lagged = ts2.shift(lag).dropna()
        ts1_lagged = ts1[lag:]  # align ts1 with lagged ts2
        
        # Ensure the series are aligned and drop NaNs
        aligned = pd.concat([ts1_lagged, ts2_lagged], axis=1).dropna()
        ts1_aligned = aligned.iloc[:, 0]
        ts2_aligned = aligned.iloc[:, 1]

        # Plot
        # plt.figure()
        # plt.plot(data_0_1(ts1_aligned))
        # plt.plot(data_0_1(ts2_aligned))
        # plt.title(lag)
        
        # Perform co-integration test
        if len(ts1_aligned) > 0:
            coint_result = coint(ts1_aligned, ts2_aligned, autolag=None, trend='ct')
            p_value = coint_result[1]
            
            # Compute AIC for model selection
            model = sm.OLS(ts1_aligned, sm.add_constant(ts2_aligned)).fit()
            aic = model.aic
            print(aic)
            
            # Select the best model based on the criterion
            if criterion == 'aic':
                if aic < best_aic:
                    best_aic = aic
                    best_lag = lag
                    best_p_value = p_value
                    beta = model.params[1]
                    intercept = model.params['const']
            elif criterion == 'bic':
                bic = model.bic
                if bic < best_aic:
                    best_aic = bic
                    best_lag = lag
                    best_p_value = p_value
                    beta = model.params[1]
                    intercept = model.params['const']
            elif criterion == 'hqic':
                hqic = model.hqic
                if hqic < best_aic:
                    best_aic = hqic
                    best_lag = lag
                    best_p_value = p_value
                    beta = model.params[1]
                    intercept = model.params['const']
    
    # Plot the best model
    ts2_lagged = ts2.shift(best_lag).dropna()
    ts1_lagged = ts1[best_lag:]  # align ts1 with lagged ts2
    aligned = pd.concat([ts1_lagged, ts2_lagged], axis=1).dropna()
    ts1_aligned = aligned.iloc[:, 0]
    ts2_aligned = aligned.iloc[:, 1]
    model = sm.OLS(ts1_aligned, sm.add_constant(ts2_aligned)).fit()
    fig, ax = plt.subplots(2, 1)
    ax[0].plot(ts1_aligned.index, ts1_aligned)
    # ax[0].plot(ts1_aligned.index, ts2_aligned)
    ax[0].plot(ts1_aligned.index, model.predict(sm.add_constant(ts2_aligned)))
    # plot regression
    ax[0].set_title(f"Best Model (Lag={best_lag})")

    ax[1].scatter(ts1_aligned.index, ts1_aligned - model.predict(sm.add_constant(ts2_aligned)))
    

    # are the residuals stationary?
    adf = adfuller(ts1_aligned - model.predict(sm.add_constant(ts2_aligned)))
    
    # Determine if the series are co-integrated (p-value < 0.05)
    is_cointegrated = best_p_value < 0.05 if best_p_value is not None else False
    ax[1].set_title("Co-integration: {}".format(is_cointegrated))
    
    return best_p_value, best_lag, is_cointegrated, beta


ts11 = dd['wave_energy_of_combined_wind_waves_and_swell']
ts22 = dd['sea_level_anomaly']

# Test for co-integration
p_value, lag, is_cointegrated, beta = test_cointegration(ts11, ts22)

print(f"P-value: {p_value}")
print(f"Optimal Lag: {lag}")
print(f"Are the series co-integrated? {'Yes' if is_cointegrated else 'No'}")
print(beta)


438.10216108288716
P-value: 0.8754021383794642
Optimal Lag: 0
Are the series co-integrated? No
-1.1975683363071934


In [324]:
coint_result = coint(ts_df['sea_surface_temperature'], ts_df['sea_level_anomaly'], trend='c')
# Null hypothesis: the two series are not cointegrated
coint_result[1]

# plt.plot(ts_df.index, data_0_1(ts_df['wave_energy_of_combined_wind_waves_and_swell']))
# plt.plot(ts_df.index, data_0_1(ts_df['sea_level_anomaly']))
# plt.plot(ts_df.index, data_0_1(ts_df['coastline_position']))
# plt.show()

0.6919569561008979

In [367]:
def find_cointegrated_pairs(data):
    n = data.shape[1]
    score_matrix = np.zeros((n, n))
    pvalue_matrix = np.ones((n, n))
    keys = data.keys()
    pairs = []
    for i in range(n):
        for j in range(i+1, n):
            S1 = data[keys[i]]
            S2 = data[keys[j]]
            result = coint(S1, S2)
            score = result[0]
            pvalue = result[1]
            score_matrix[i, j] = score
            pvalue_matrix[i, j] = pvalue
            if pvalue < 0.1:
                pairs.append((keys[i], keys[j]))
    return score_matrix, pvalue_matrix, pairs

scores, pvalues, pairs = find_cointegrated_pairs(ts_df_trend)

import seaborn
seaborn.heatmap(scores, xticklabels=list(ts_df.columns), yticklabels=list(ts_df.columns), cmap='RdYlGn_r')

<Axes: >

In [358]:
pairs

[('2_metre_dewpoint_temperature', '2_metre_temperature')]

In [366]:
for col in ts_df_trend.columns:
    plt.plot(ts_df_trend.index, data_0_1(ts_df_trend[col]), label=col)
plt.legend()

<matplotlib.legend.Legend at 0x24f18391820>

In [370]:
data_0_1(ts_df_trend['sea_surface_temperature']).plot()
data_0_1(ts_df_trend['sea_level_anomaly']).plot()

<Axes: >

In [352]:
for col in ts_df.columns:
    adf = adfuller(ts_df_trend[col])
    if adf[1] < 0.05:
        print(f"{col} is stationary")
    else:
        print(f"{col} is not stationary")

coastline_position is not stationary
wave_energy_of_combined_wind_waves_and_swell is not stationary
sea_level_anomaly is not stationary
sea_surface_temperature is not stationary
total_precipitation is not stationary
2_metre_dewpoint_temperature is not stationary
mean_wave_direction is stationary
wind_direction_10m is not stationary
wind_speed_10m is not stationary
mean_sea_level_pressure is not stationary
2_metre_temperature is not stationary
evaporation is not stationary


In [318]:
plt.plot(ts_df.index, data_0_1(ts_df['wave_energy_of_combined_wind_waves_and_swell']))
plt.plot(ts_df.index, data_0_1(ts_df['sea_level_anomaly']))
plt.plot(ts_df.index, data_0_1(ts_df['coastline_position']))
plt.show()

In [15]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax2 = ax.twinx()
ts1.plot(ax=ax, color='r')
ts2.plot(ax=ax2, color='b')

<Axes: >

In [5]:
from statsmodels.tsa.stattools import ccf, grangercausalitytests, coint, adfuller

t_statistic, p_val, critical_p_val = coint(ts1, ts2, trend='ct', autolag=None, maxlag=3)
print(f' t statistic: {np.round(t_statistic, 2)} \n p value: {np.round(p_val,2)} \n critical p values [1%, 5%, 10%] {critical_p_val}')




 t statistic: -3.83 
 p value: 0.04 
 critical p values [1%, 5%, 10%] [-4.50158458 -3.88654012 -3.57500991]


In [6]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen

# 4. Johansen Co-integration Test
data = pd.concat([ts1, ts2], axis=1)
result = coint_johansen(data, det_order=0, k_ar_diff=3)

print('Johansen Test Statistics:')
print(result.lr1)  # Trace statistic
print('Critical Values (90%, 95%, 99%):')
print(result.cvt)

# Interpret the Johansen test results
for i in range(len(result.lr1)):
    trace_stat = result.lr1[i]
    critical_values = result.cvt[i]
    print(f'Trace statistic for r<={i}: {trace_stat:.2f}, critical values {critical_values}')

    if trace_stat > critical_values[1]:  # Compare to 95% critical value
        print(f'Null hypothesis of r<={i} is rejected at the 95% confidence level.')
    else:
        print(f'Null hypothesis of r<={i} cannot be rejected at the 95% confidence level.')

Johansen Test Statistics:
[56.70130768 11.12405127]
Critical Values (90%, 95%, 99%):
[[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
Trace statistic for r<=0: 56.70, critical values [13.4294 15.4943 19.9349]
Null hypothesis of r<=0 is rejected at the 95% confidence level.
Trace statistic for r<=1: 11.12, critical values [2.7055 3.8415 6.6349]
Null hypothesis of r<=1 is rejected at the 95% confidence level.


In [7]:
# Extract co-integrating vectors and compute the co-integrating relationship
coint_vectors = result.evec
coint_vector = coint_vectors[:, 0]  # First co-integrating vector

# data = pd.concat([ts1, ts2], axis=1)

# Calculate the co-integrating relationship
coint_relationship = np.dot(data_s, coint_vector)

# Plot the time series and co-integrating relationship on the same plot
plt.figure(figsize=(14, 6))
plt.plot(ts1, label='Time Series 1', color='blue')
plt.plot(ts2, label='Time Series 2', color='red')
plt.plot(data.index, coint_relationship, label='Co-integrating Relationship', color='green', linestyle='--')
plt.legend()
plt.title('Time Series and Co-integrating Relationship')
plt.show()

# Test for stationarity of the residuals
residuals = coint_relationship - np.mean(coint_relationship)
adf_result = adfuller(residuals)
print(f'ADF Statistic: {adf_result[0]}')
print(f'p-value: {adf_result[1]}')
print(f'Critical Values: {adf_result[4]}')

# Plot the residuals of the co-integrating relationship on a separate plot
plt.figure(figsize=(14, 6))
plt.plot(data.index, residuals, label='Residuals of Co-integrating Relationship', color='purple')
plt.axhline(0, color='black', linestyle='--')
plt.title('Residuals of Co-integrating Relationship')
plt.legend()
plt.show()

NameError: name 'data_s' is not defined

In [114]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.vector_ar.vecm import coint_johansen

# import plot_acf
from statsmodels.graphics.tsaplots import plot_acf

import Rbeast as rb

# Plot original time series
plt.figure(figsize=(14, 6))
plt.plot(data['coastline_position_transect_2_waterline'], label='Time Series 1')
plt.plot(data['wave_energy_of_combined_wind_waves_and_swell'], label='Time Series 2')
plt.legend()
plt.title('Original Time Series with Seasonality')
plt.show()

# Seasonal Decomposition
# decomposition_ts1 = seasonal_decompose(data['coastline_position_transect_2_waterline'], model='additive', period=12)
# decomposition_ts2 = seasonal_decompose(data['wave_energy_of_combined_wind_waves_and_swell'], model='additive', period=12)
decomposition_ts1 = rb.beast(data['coastline_position_transect_2_waterline'], period='1 year', deltat='1/12 year')
decomposition_ts2 = rb.beast(data['wave_energy_of_combined_wind_waves_and_swell'], period='1 year', deltat='1/12 year')
rb.plot(decomposition_ts2)

plot_acf(data['coastline_position_transect_2_waterline'])

# # Plot seasonal decomposition
# plt.figure(figsize=(14, 12))

# plt.subplot(2, 3, 1)
# plt.plot(data['coastline_position_transect_2_waterline'], label='Time Series 1')
# plt.legend()
# plt.title('Time Series 1')

# plt.subplot(2, 3, 2)
# plt.plot(decomposition_ts1.trend.dropna(), label='Trend Component')
# plt.legend()
# plt.title('Trend Component of TS1')

# plt.subplot(2, 3, 3)
# plt.plot(decomposition_ts1.seasonal, label='Seasonal Component')
# plt.legend()
# plt.title('Seasonal Component of TS1')

# plt.subplot(2, 3, 4)
# plt.plot(data['wave_energy_of_combined_wind_waves_and_swell'], label='Time Series 2')
# plt.legend()
# plt.title('Time Series 2')

# plt.subplot(2, 3, 5)
# plt.plot(decomposition_ts2.trend.dropna(), label='Trend Component')
# plt.legend()
# plt.title('Trend Component of TS2')

# plt.subplot(2, 3, 6)
# plt.plot(decomposition_ts2.seasonal, label='Seasonal Component')
# plt.legend()
# plt.title('Seasonal Component of TS2')

# plt.tight_layout()
# plt.show()

# # Remove seasonality by subtracting seasonal component
# data_deseasonalized = pd.DataFrame({
#     'ts1_deseasonalized': data['coastline_position_transect_2_waterline'] - decomposition_ts1.seasonal,
#     'ts2_deseasonalized': data['wave_energy_of_combined_wind_waves_and_swell'] - decomposition_ts2.seasonal
# })

# # Perform Johansen test on deseasonalized data
# result_deseasonalized = coint_johansen(data_deseasonalized, det_order=0, k_ar_diff=3)

# # Extract co-integrating vectors and compute the co-integrating relationship
# coint_vectors_deseasonalized = result_deseasonalized.evec
# coint_vector_deseasonalized = coint_vectors_deseasonalized[:, 0]  # First co-integrating vector

# # Calculate the co-integrating relationship
# coint_relationship_deseasonalized = np.dot(data_deseasonalized, coint_vector_deseasonalized)

# # Plot deseasonalized time series and co-integrating relationship
# plt.figure(figsize=(14, 6))
# # plt.plot(data_deseasonalized['ts1_deseasonalized'], label='Deseasonalized Time Series 1', color='blue')
# # plt.plot(data_deseasonalized['ts2_deseasonalized'], label='Deseasonalized Time Series 2', color='red')
# plt.plot(data.index, coint_relationship_deseasonalized, label='Co-integrating Relationship', color='green', linestyle='--')
# plt.legend()
# plt.title('Deseasonalized Time Series and Co-integrating Relationship')
# plt.show()


INFO: To supress printing the parameers in beast(),      set print.options = 0 
INFO: To supress printing the parameers in beast_irreg(),set print.options = 0 
INFO: To supress printing the parameers in beast123(),   set extra.printOptions = 0  

#--------------------------------------------------#
#       Brief summary of Input Data                #
#--------------------------------------------------#
Data Dimension: One signal of length 92
IsOrdered     : Yes, ordered in time
IsRegular     : Yes, evenly spaced at interval of  0.0833333 year = 1 months = 30.4167 days
HasSeasonCmpnt: True  | period = 1 year = 12 months = 365 days. The model 'Y=Trend+Season+Error' is fitted.
              : Num_of_DataPoints_per_Period = period/deltaTime = 1/0.0833333 = 12
HasOutlierCmpt: False | If true, Y=Trend+Season+Outlier+Error fitted instead of Y=Trend+Season+Error
Deseasonalize : False | If true, remove a global seasonal  cmpnt before running BEAST & add it back after BEAST
Detrend       : Fals

<Figure size 640x480 with 1 Axes>

In [134]:
data_s = pd.DataFrame({'ts1_s': decomposition_ts1.season.Y, 'ts2_s': decomposition_ts2.season.Y}, index=ts1.index)
data_res = pd.DataFrame({'ts1_res': ts1 - decomposition_ts1.trend.Y - decomposition_ts1.season.Y, 'ts2_res': ts2 - decomposition_ts2.trend.Y - decomposition_ts2.season.Y}, index=ts1.index)
data_t = pd.DataFrame({'ts1_t': decomposition_ts1.trend.Y, 'ts2_t': decomposition_ts2.trend.Y}, index=ts1.index)

In [143]:
fig, ax = plt.subplots()
ax.plot(ts1.index, decomposition_ts2.trend.Y)
ax2 = ax.twinx()
ax2.plot(ts1.index, decomposition_ts1.trend.Y, color='r')

[<matplotlib.lines.Line2D at 0x21a6a08b950>]

In [145]:
rb.plot(decomposition_ts2)

(<Figure size 640x480 with 9 Axes>,
 array([<Axes: xlabel='[]', ylabel='Y'>,
        <Axes: xlabel='[]', ylabel='season'>,
        <Axes: xlabel='[]', ylabel='Pr(scp)'>,
        <Axes: xlabel='[]', ylabel='sOrder'>,
        <Axes: xlabel='[]', ylabel='trend'>,
        <Axes: xlabel='[]', ylabel='Pr(tcp)'>,
        <Axes: xlabel='[]', ylabel='tOrder'>,
        <Axes: xlabel='[]', ylabel='slpsgn'>,
        <Axes: xlabel='time', ylabel='error'>], dtype=object))

In [141]:
from statsmodels.tsa.vector_ar.vecm import coint_johansen

# 4. Johansen Co-integration Test
result = coint_johansen(data_t, det_order=0, k_ar_diff=1)

print('Johansen Test Statistics:')
print(result.lr1)  # Trace statistic
print('Critical Values (90%, 95%, 99%):')
print(result.cvt)


# Interpret the Johansen test results
for i in range(len(result.lr1)):
    trace_stat = result.lr1[i]
    critical_values = result.cvt[i]
    print(f'Trace statistic for r<={i}: {trace_stat:.2f}, critical values {critical_values}')

    if trace_stat > critical_values[1]:  # Compare to 95% critical value
        print(f'Null hypothesis of r<={i} is rejected at the 95% confidence level.')
    else:
        print(f'Null hypothesis of r<={i} cannot be rejected at the 95% confidence level.')


Johansen Test Statistics:
[10.22817199  2.17772393]
Critical Values (90%, 95%, 99%):
[[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
Trace statistic for r<=0: 10.23, critical values [13.4294 15.4943 19.9349]
Null hypothesis of r<=0 cannot be rejected at the 95% confidence level.
Trace statistic for r<=1: 2.18, critical values [2.7055 3.8415 6.6349]
Null hypothesis of r<=1 cannot be rejected at the 95% confidence level.


# Causal inference and discovery

In [17]:
# Imports
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib qt    
## use `%matplotlib notebook` for interactive figures
# plt.style.use('ggplot')
import sklearn

import tigramite
from tigramite import data_processing as pp
from tigramite.toymodels import structural_causal_processes as toys

from tigramite import plotting as tp
from tigramite.pcmci import PCMCI
from tigramite.lpcmci import LPCMCI

from tigramite.independence_tests.parcorr import ParCorr
from tigramite.independence_tests.robust_parcorr import RobustParCorr
from tigramite.independence_tests.parcorr_wls import ParCorrWLS 
from tigramite.independence_tests.gpdc import GPDC
from tigramite.independence_tests.cmiknn import CMIknn
from tigramite.independence_tests.cmisymb import CMIsymb
from tigramite.independence_tests.gsquared import Gsquared
from tigramite.independence_tests.regressionCI import RegressionCI

In [110]:
# Initialize dataframe object, specify time axis and variable names
var_names = names
dataframe = pp.DataFrame(ts_df.values, 
                         datatime = ts_df.index, 
                         var_names=var_names)

tp.plot_timeseries(dataframe)

(<Figure size 640x480 with 11 Axes>,
 array([<Axes: ylabel='wave_energy'>, <Axes: ylabel='sea_level_anomaly'>,
        <Axes: ylabel='sst'>, <Axes: ylabel='total_precipitation'>,
        <Axes: ylabel='dewpoint_temperature'>,
        <Axes: ylabel='mean_wave_direction'>,
        <Axes: ylabel='wind_direction_10m'>,
        <Axes: ylabel='wind_speed_10m'>,
        <Axes: ylabel='mean_sea_level_pressure'>,
        <Axes: ylabel='2_metre_temperature'>, <Axes: ylabel='evaporation'>],
       dtype=object))

In [165]:
# Initialize dataframe object, specify time axis and variable names
var_names = list(subset_ccf.columns)
dataframe = pp.DataFrame(subset_ccf.values, 
                         datatime = subset_ccf.index, 
                         var_names=var_names)

tp.plot_timeseries(dataframe)

(<Figure size 640x480 with 11 Axes>,
 array([<Axes: ylabel='wave_energy_of_combined_wind_waves_and_swell'>,
        <Axes: ylabel='sea_level_anomaly'>,
        <Axes: ylabel='sea_surface_temperature'>,
        <Axes: ylabel='total_precipitation'>,
        <Axes: ylabel='2_metre_dewpoint_temperature'>,
        <Axes: ylabel='mean_wave_direction'>,
        <Axes: ylabel='wind_direction_10m'>,
        <Axes: ylabel='wind_speed_10m'>,
        <Axes: ylabel='mean_sea_level_pressure'>,
        <Axes: ylabel='2_metre_temperature'>, <Axes: ylabel='evaporation'>],
       dtype=object))

In [14]:
parcorr = ParCorr(significance='analytic')
pcmci = PCMCI(
    dataframe=dataframe, 
    cond_ind_test=parcorr,
    verbosity=1)
correlations = pcmci.get_lagged_dependencies(tau_max=13, val_only=True)['val_matrix']

NameError: name 'ParCorr' is not defined

In [167]:
matrix_lags = np.argmax(np.abs(correlations), axis=2)
tp.plot_scatterplots(dataframe=dataframe, add_scatterplot_args={'matrix_lags':matrix_lags}); plt.show()

In [168]:
tp.plot_densityplots(dataframe=dataframe, add_densityplot_args={'matrix_lags':matrix_lags}); plt.show()

In [169]:
parcorr = ParCorr(significance='analytic')
pcmci = PCMCI(
    dataframe=dataframe, 
    cond_ind_test=parcorr,
    verbosity=1)

correlations = pcmci.get_lagged_dependencies(tau_max=13, val_only=True)['val_matrix']
lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations, setup_args={'var_names':var_names, 
                                    'x_base':5, 'y_base':.5}); plt.show()


##
## Estimating lagged dependencies 
##

Parameters:

independence test = par_corr
tau_min = 0
tau_max = 13


In [115]:
tau_max = 12
pc_alpha = 0.01
pcmci.verbosity = 1
results = pcmci.run_pcmci(tau_max=tau_max, pc_alpha=None, alpha_level=0.1)


##
## Step 1: PC1 algorithm for selecting lagged conditions
##

Parameters:
independence test = par_corr
tau_min = 1
tau_max = 12
pc_alpha = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
max_conds_dim = None
max_combinations = 1



## Resulting lagged parent (super)sets:

    Variable wave_energy has 10 link(s):
    [pc_alpha = 0.4]
        (wave_energy -12): max_pval = 0.00013, |min_val| =  0.472
        (2_metre_temperature -1): max_pval = 0.04060, |min_val| =  0.267
        (mean_sea_level_pressure -2): max_pval = 0.04982, |min_val| =  0.244
        (total_precipitation -9): max_pval = 0.05215, |min_val| =  0.240
        (dewpoint_temperature -2): max_pval = 0.15100, |min_val| =  0.189
        (evaporation -2): max_pval = 0.17416, |min_val| =  0.167
        (mean_sea_level_pressure -6): max_pval = 0.20441, |min_val| =  0.163
        (wind_direction_10m -9): max_pval = 0.30686, |min_val| =  0.127
        (sst -1): max_pval = 0.35170, |min_val| =  0.118
        (total_precipitation -12): max_pval 

In [170]:
tau_max = 12
pc_alpha = 0.1
pcmci.verbosity = 1

results = pcmci.run_pcmciplus(tau_min=0, tau_max=tau_max, pc_alpha=pc_alpha)


##
## Step 1: PC1 algorithm for selecting lagged conditions
##

Parameters:
independence test = par_corr
tau_min = 1
tau_max = 12
pc_alpha = [0.1]
max_conds_dim = None
max_combinations = 1



## Resulting lagged parent (super)sets:

    Variable wave_energy_of_combined_wind_waves_and_swell has 6 link(s):
        (wave_energy_of_combined_wind_waves_and_swell -12): max_pval = 0.00001, |min_val| =  0.385
        (mean_wave_direction -5): max_pval = 0.00030, |min_val| =  0.322
        (mean_sea_level_pressure -2): max_pval = 0.00598, |min_val| =  0.248
        (sea_surface_temperature -2): max_pval = 0.00963, |min_val| =  0.233
        (2_metre_temperature -2): max_pval = 0.05226, |min_val| =  0.175
        (mean_wave_direction -6): max_pval = 0.05894, |min_val| =  0.171

    Variable sea_level_anomaly has 4 link(s):
        (sea_level_anomaly -1): max_pval = 0.00000, |min_val| =  0.554
        (wave_energy_of_combined_wind_waves_and_swell -3): max_pval = 0.02000, |min_val| =  0.209
     

In [171]:
q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], tau_max=tau_max, fdr_method='fdr_bh')
# pcmci.print_significant_links(
#         p_matrix = q_matrix,
#         val_matrix = results['val_matrix'],
#         alpha_level = 0.1)
graph = pcmci.get_graph_from_pmatrix(p_matrix=q_matrix, alpha_level=0.1, 
            tau_min=0, tau_max=tau_max, link_assumptions=None)
results['graph'] = graph

In [172]:
tp.plot_graph(
    val_matrix=results['val_matrix'],
    graph=results['graph'],
    var_names=var_names,
    link_colorbar_label='cross-MCI (edges)',
    node_colorbar_label='auto-MCI (nodes)',
    ); plt.show()

In [148]:
# Plot time series graph    
tp.plot_time_series_graph(
    figsize=(6, 4),
    val_matrix=results['val_matrix'],
    graph=results['graph'],
    var_names=var_names,
    link_colorbar_label='MCI',
    ); plt.show()