Read time series of _daily_ climate indices

In [1]:
import pandas as pd
import datetime
import numpy as np
from paraguayfloodspy.pars  import GetPars

Model Parameters

In [2]:
time_pars = GetPars('time')
syear,eyear = time_pars['syear'], time_pars['eyear']
outfile = "../_data/indices/monthly_indices.csv"

## Read in Raw

First define a function

In [3]:
def read_indices(raw_url, col_name):
    df = pd.read_table(raw_url, delim_whitespace=True,index_col=None, skiprows=2, names=['time', '{}'.format(col_name)])
    df['time'] = np.int_(np.floor(df['time']))
    df['year'] = 1960 + df['time'] // 12
    df['month'] = 1 + df['time'] % 12
    df['day'] = 1
    df['time'] = pd.to_datetime(df[['year', 'month', 'day']])
    df.index = df['time']
    df = df[['{}'.format(col_name)]]
    return(df)

Now read in the data

In [4]:
pdo = read_indices(
    raw_url = 'http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.PDO/.pdo/gridtable.tsv', 
    col_name = 'pdo'
)
enso = read_indices(
    raw_url = 'http://iridl.ldeo.columbia.edu/SOURCES/.Indices/.nino/.EXTENDED/.NINO34/gridtable.tsv',
    col_name = 'nino_34'
)

## Merge

Subset only from the start year to end year

In [5]:
df = pdo.join(enso,  how='outer')
df = df['{}'.format(syear):'{}'.format(eyear)]

Preview the data here to make sure it's what we want

In [6]:
df.head()
df.tail(10)

Unnamed: 0_level_0,pdo,nino_34
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-03-01,2.4,1.711209
2016-04-01,2.62,1.384699
2016-05-01,2.35,0.798263
2016-06-01,2.03,0.322799
2016-07-01,1.25,-0.10584
2016-08-01,0.52,-0.242217
2016-09-01,0.45,-0.130281
2016-10-01,,-0.539678
2016-11-01,,-0.298363
2016-12-01,,-0.231083


Save to file

In [7]:
df.to_csv(outfile)