In [37]:
# Notebook to explore different correlation metrics between ENSO indices and incidence time series
# Using methods from: https://towardsdatascience.com/four-ways-to-quantify-synchrony-between-time-series-data-b99136c4a9c9

# Load libraries and extract data 

# libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# load data
country_data = pd.read_csv('~/ENSO-malaria-analysis/data/country_info.csv')
oceanic_nino_index_data = pd.read_csv('~/ENSO-malaria-analysis/data/oni_ts.csv')
pf_incidence_data = pd.read_csv('~/ENSO-malaria-analysis/data/PFIncidence_global_admin1.csv')
pf_incidence_data.Year = pd.to_datetime(pf_incidence_data.Year,format="%Y")
# extract ISO codes for African countries
country_data_africa = country_data.loc[country_data['region'] == 'Africa']

# Northern Africa sub-regions not filled in for some reason
country_data_africa['intermediate-region'] = country_data_africa['intermediate-region'].fillna('Northern Africa')
iso_africa = country_data_africa['alpha-3'].unique()

# extract incidence data for African countries
africa_incidence_data = pf_incidence_data.loc[pf_incidence_data['ISO'].isin(iso_africa)]

# group by country-level means
years = africa_incidence_data.Year.unique()
africa_incidence_data = africa_incidence_data.groupby(['ISO', 'Name_0', 'Year'], as_index=False)['incidence_rate_rmean'].mean()

# Oceanic Nino Index

# plot Oceanic Nino Index - mean and max values
oni_yearly_means = oceanic_nino_index_data.groupby('YR', as_index=False)['ANOM'].mean()
oni_yearly_means.columns = ['Year', 'ONI Anomaly (Mean)']

oni_yearly_max = oceanic_nino_index_data.groupby('YR', as_index=False)['ANOM'].max()
oni_yearly_max.columns = ['Year', 'ONI Anomaly (Max)']
oni_yearly = pd.concat([oni_yearly_means.Year, oni_yearly_means['ONI Anomaly (Mean)'], oni_yearly_max['ONI Anomaly (Max)']], axis=1)
oni_yearly = oni_yearly.melt('Year', var_name='cols', value_name='vals')
oni_yearly = oni_yearly[oni_yearly.Year >= 2000]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [40]:
# let's use Mean values for now...

oni_yearly_means = oni_yearly_means[oni_yearly_means.Year.between(2000,2020)]
oni_mean_values = oni_yearly_means['ONI Anomaly (Mean)']

In [56]:
# 1. Time-lagged cross correlation / Granger's causality 
# We will try a bunch of different lagged correlations between the ENSO indices and the malaria incidence time series to see which lag yields the greatest
# correlaton value 

def crosscorr(datax, datay, lag=0, wrap=False):
    """ Lag-N cross correlation. 
    Shifted data filled with NaNs 
    
    Parameters
    ----------
    lag : int, default 0
    datax, datay : pandas.Series objects of equal length
    Returns
    ----------
    crosscorr : float
    """
    if wrap:
        shiftedy = datay.shift(lag)
        shiftedy.iloc[:lag] = datay.iloc[-lag:].values
        return datax.corr(shiftedy)
    else: 
        return datax.corr(datay.shift(lag))
    

# get countries
countries = africa_incidence_data['Name_0'].unique()
for country in countries:
    incidence_data = africa_incidence_data[africa_incidence_data['Name_0'] == country]['incidence_rate_rmean']
    rs = [crosscorr(incidence_data,oni_mean_values, lag) for lag in range(-3,3)]
    print(rs)


[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[0.1821561894580574, 0.38645966407833665, 0.07744420078885601, -0.4752059141075736, -0.6971973698831753, -0.6554526086715895]
[0.3984920654912021, 0.5065944775490329, 0.43472240983198535, 0.2205520775376448, 0.008902328559802577, -0.30578650697564197]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan]
[nan