In [1]:
import numpy as np
import pandas as pd
import xarray
import glob

In [2]:
#csv files to merge into netcdf
data_dir = '/Users/gbromley/Dropbox/TropicalSynthesis/AllData/FutureProjections/'
data_files = ['AU-Rob.csv','BR-Ban.csv','BR-Cax.csv','BR-Ji2.csv','BR-Ma2.csv','BR-Sa1.csv','BR-Sa3.csv','GF-Guy.csv','GH-Ank.csv','ID-Pag.csv','MY-PSO.csv','PA-SPn.csv','VU-Coc.csv']

In [None]:
#create date range of the csv files (monthly starting from 1/1/1950)
date_range = pd.date_range('1/1/1950', periods=1812, freq='MS')
ds_list=[]

In [4]:
#extracts the variables from each site csv, gives dimensions time and gcm(climate model)
#adds the newly created dataset to a list for merging
for filenum in np.arange(0,len(data_files),1):
    raw_site_data = pd.read_csv(data_dir+data_files[filenum], header=[0,1])
    ds = xarray.Dataset({'vpd': (['time','gcm'],raw_site_data.ix[:,2:15]),
                    'tmax': (['time','gcm'],raw_site_data.ix[:,15:28]),
                    'tmin': (['time','gcm'],raw_site_data.ix[:,28:42])},
                    coords={'gcm': np.arange(0,13,1),
                            'time': pd.date_range('1/1/1950', periods=1812, freq='MS')})
    
    ds_list.append(ds)

In [5]:
#merges data along new coordinate 'site'
all_sites_ds = xarray.concat(ds_list, dim='site')

In [8]:
#change the site dimension from int to site names
names = []
for i in np.arange(0,len(data_files),1):
    names.append(data_files[i].split('.')[0])
names = np.array(names)
names = names.astype(np.dtype(str))

all_sites_ds['site'] = names
#can't figure out how to write the proper unit without errors, so added the time unit globally
all_sites_ds.attrs['actual_time_unit']='months since 1950-01-01'
all_sites_ds.to_netcdf('ClimProjAll.nc',mode='w')

In [7]:
#making sure the dataset looks reasonable
all_sites_ds

<xarray.Dataset>
Dimensions:  (gcm: 13, site: 13, time: 1812)
Coordinates:
  * gcm      (gcm) int64 0 1 2 3 4 5 6 7 8 9 10 11 12
  * time     (time) datetime64[ns] 1950-01-01 1950-02-01 1950-03-01 ...
  * site     (site) <U6 'AU-Rob' 'BR-Ban' 'BR-Cax' 'BR-Ji2' 'BR-Ma2' ...
Data variables:
    vpd      (site, time, gcm) float64 0.9142 0.7234 1.087 0.887 0.8595 ...
    tmax     (site, time, gcm) float64 30.08 27.53 31.27 30.21 29.5 30.54 ...
    tmin     (site, time, gcm) float64 20.71 19.29 20.42 21.28 20.5 20.38 ...
Attributes:
    actual_time_unit: months since 1950-01-01

In [14]:
#check data against original csv visually
all_sites_ds.sel(site='BR-Sa1', gcm=0)['vpd']

<xarray.DataArray 'vpd' (time: 1812)>
array([ 0.88412562,  0.80626845,  0.78622362, ...,  2.53492563,
        2.39351788,  2.07285907])
Coordinates:
    gcm      int64 0
  * time     (time) datetime64[ns] 1950-01-01 1950-02-01 1950-03-01 ...
    site     <U6 'BR-Sa1'