# Compiling env data for satellite models

## Two types of satellite data: historical, and contemporary

I ran the commands in go_satellite_data.sh to get sst_, chla_, poc_, pic_, npp_, and par_ compiled.nc files, which contain the 9km (or 4km for chla) resolution monthly average data for 2009-2013. 

I also got a bunch of historical averages for a lot of data, contained in the historical_data folder; a summary of what these fields are is found in historical_data_guide.xlsx. Some of these have depth values, some are just from surface; some are monthly averages, some are annual; some are 50-yr averages, some are decadal; etc.

### Need to go through each data source, and pull the data point from the correct latitude, longitude, and depth (nearest possible point, won't be exact) for each TARA site

## Create TARA dataframe 

--latitude
--longitude
--year sampled (from sampling date)
--month sampled (from sampling date)
--sampling depth
--filter_range (from filter lower and filter upper)


## Get general info from samples (lat, lon, depth, etc)

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import netCDF4

In [68]:
samples = pd.read_csv("TARASampleDescriptionTable.csv")
samples = samples[['Sample label [TARA_station#_environmental-feature_size-fraction]', 'INSDC run accession number(s)', 
         'Date/Time [yyyy-mm-ddThh:mm]', 'Latitude [degrees North]', 'Longitude [degrees East]', 'Sampling depth [m]', 
         'Size fraction lower threshold [micrometre]', 'Size fraction upper threshold [micrometre]']]
samples.head()

Unnamed: 0,Sample label [TARA_station#_environmental-feature_size-fraction],INSDC run accession number(s),Date/Time [yyyy-mm-ddThh:mm],Latitude [degrees North],Longitude [degrees East],Sampling depth [m],Size fraction lower threshold [micrometre],Size fraction upper threshold [micrometre]
0,TARA_004_DCM_0.22-1.6,ERR598950|ERR599095,2009-09-15T18:00,36.5533,-6.5669,40,0.22,1.6
1,TARA_004_SRF_0.22-1.6,ERR598955|ERR599003,2009-09-15T11:30,36.5533,-6.5669,5,0.22,1.6
2,TARA_007_DCM_0.22-1.6,ERR315856,2009-09-23T16:08,37.0541,1.9478,42,0.22,1.6
3,TARA_007_SRF_0.22-1.6,ERR315857,2009-09-23T12:50,37.051,1.9378,5,0.22,1.6
4,TARA_009_DCM_0.22-1.6,ERR594315|ERR594329,2009-09-28T16:59,39.0609,5.9422,55,0.22,1.6


In [67]:
env = pd.DataFrame({'TARA_sample_label':samples['Sample label [TARA_station#_environmental-feature_size-fraction]']})
env['run_ids'] = samples['INSDC run accession number(s)']
env['filter_range'] = samples['Size fraction lower threshold [micrometre]']+'-'+samples['Size fraction upper threshold [micrometre]'].astype('string')
env['latitude'] = samples['Latitude [degrees North]']
env['longitude'] = samples['Longitude [degrees East]']
env['depth_sampled'] = samples['Sampling depth [m]']
env['year_sampled'] = samples['Date/Time [yyyy-mm-ddThh:mm]'].apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%dT%H:%M').year)
env['month_sampled'] = samples['Date/Time [yyyy-mm-ddThh:mm]'].apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%dT%H:%M').month)
env.head()

Unnamed: 0,TARA_sample_label,run_ids,filter_range,latitude,longitude,depth_sampled,year_sampled,month_sampled
0,TARA_004_DCM_0.22-1.6,ERR598950|ERR599095,0.22-1.6,36.5533,-6.5669,40,2009,9
1,TARA_004_SRF_0.22-1.6,ERR598955|ERR599003,0.22-1.6,36.5533,-6.5669,5,2009,9
2,TARA_007_DCM_0.22-1.6,ERR315856,0.22-1.6,37.0541,1.9478,42,2009,9
3,TARA_007_SRF_0.22-1.6,ERR315857,0.22-1.6,37.051,1.9378,5,2009,9
4,TARA_009_DCM_0.22-1.6,ERR594315|ERR594329,0.22-1.6,39.0609,5.9422,55,2009,9


## Add contemporary satellite data

* sst
* chla
* npp
* par
* pic
* poc

In [255]:
#function to get gregorian days since e.g. 1970-01-01
def get_yrmo_gregorian_timedelta(year, month, datetime_since):
    leap_days_in_month = {1:31, 2:29, 3:31, 4:30, 5:31, 6:30, 7:31, 8:31, 9:30, 10:31, 11:30, 12:31}
    common_days_in_month = {1:31, 2:28, 3:31, 4:30, 5:31, 6:30, 7:31, 8:31, 9:30, 10:31, 11:30, 12:31}
    leap_years = {2008:True, 2009:False, 2010:False, 2011:False, 2012:True, 2013:False}

    if leap_years[year]==True:
        day = leap_days_in_month[month]
    else:
        day = common_days_in_month[month]

    return (dt.datetime(year, month, day) - datetime_since).days
    
#function to pull satellite value from lat-lon array for correct time

def pull_satellite_data(nc,year,month,z_variable,lat_target,lon_target,dep_target, lat="lat",lon="lon",time="time", vert="vert", index_order=['time', 'lat', 'lon']):
    import numpy as np
    indices = {}
    
    #get lat and lon indexes in netcdf
    lats = nc.variables[lat][:]
    lons = nc.variables[lon][:]
    indices['lat'] = np.argmin(np.abs(lats-lat_target))
    indices['lon'] = np.argmin(np.abs(lons-lon_target))
    
    #get time index in netcdf; time is usually in gregorian days since 1970-01-01
    if time in nc.variables and len(nc.variables[time][:])>1:
        if nc.variables[time].units=='days since 1970-01-01':
            if len(nc.variables[time])==12:
                indices['time'] = month-1
            else:
                days_since = get_yrmo_gregorian_timedelta(year,month, dt.datetime(1970,1,1))
                indices['time'] = int(np.argwhere(nc.variables[time][:]==days_since))
        elif nc.variables[time].units=='Month' or nc.variables[time].units=='Months':
            #this works if the time is 1:12 for the months of the year
            indices['time'] = month-1
        else:
            raise AttributeError("time units in netcdf are not 'days since 1970-01-01' or 'Month'")
    elif time in nc.variables and len(nc.variables[time][:])==1:
        indices['time'] = 0
    
    #get vert index
    if vert in nc.variables and len(nc.variables[vert][:])==1:
        indices['vert'] = 0
    elif vert in nc.variables and len(nc.variables[vert][:])>1:
        deps = nc.variables['vert'][:]
        if min(nc.variables['vert'])<0:
            dep_target=dep_target*-1
        indices['vert'] = np.argmin(np.abs(deps-dep_target))
    
    #get data at correct index
    ix_order = [indices[k] for k in index_order]
    subset = nc.variables[z_variable]
    for ix in ix_order:
        subset = subset[ix]
    #if is masked value, and int of data is -9999, change to NaN
    if np.ma.is_masked(subset):
        if int(subset.data)==-9999:
            subset = np.nan
    print "retrieving index %s, month is %s: %s" % (ix_order, month, subset)
    return subset

In [256]:
#function to create new satellite column
def create_satellite_column(env, nc, z_var, year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled',
                            latitude='latitude', longitude='longitude', lat="lat",lon="lon",time="time", vert="vert",
                            index_order=['time', 'lat', 'lon']):
    #create chl_satellite column
    column = []
    for ix, row in env.iterrows():
        year = row[year_sampled]
        month = row[month_sampled]
        lat_target = row[latitude]
        lon_target = row[longitude]
        dep_target = row[depth_sampled]
        
        answer = pull_satellite_data(nc, year=year, month=month, z_variable=z_var, lat_target=lat_target, lon_target=lon_target, dep_target=dep_target,
                                    lat=lat,lon=lon,time=time, vert=vert, index_order=index_order)
        column.append(answer)
    return column

In [258]:
#get chl satellite data
chl_nc = netCDF4.Dataset("satellite_data/chla_monthly_compiled.nc")
chl = create_satellite_column(env=env, nc=chl_nc, z_var="chlor_a")
env['chl_satellite'] = chl
env.head()

Unnamed: 0,TARA_sample_label,run_ids,filter_range,latitude,longitude,depth_sampled,year_sampled,month_sampled,chl_satellite
0,TARA_004_DCM_0.22-1.6,ERR598950|ERR599095,0.22-1.6,36.5533,-6.5669,40,2009,9,0.319966
1,TARA_004_SRF_0.22-1.6,ERR598955|ERR599003,0.22-1.6,36.5533,-6.5669,5,2009,9,0.319966
2,TARA_007_DCM_0.22-1.6,ERR315856,0.22-1.6,37.0541,1.9478,42,2009,9,0.14131
3,TARA_007_SRF_0.22-1.6,ERR315857,0.22-1.6,37.051,1.9378,5,2009,9,0.14131
4,TARA_009_DCM_0.22-1.6,ERR594315|ERR594329,0.22-1.6,39.0609,5.9422,55,2009,9,0.084704


In [259]:
#sst
print "processing sst..."
sst_nc = netCDF4.Dataset("satellite_data/sst_compiled.nc")
sst = create_satellite_column(env=env, nc=sst_nc, z_var="sst")
env['sst_satellite'] = sst
#par
print "processing par..."
par_nc = netCDF4.Dataset("satellite_data/par_monthly_compiled.nc")
par = create_satellite_column(env=env, nc=par_nc, z_var="par")
env['par_satellite'] = par
#pic
print "processing pic..."
pic_nc = netCDF4.Dataset("satellite_data/pic_monthly_compiled.nc")
pic = create_satellite_column(env=env, nc=pic_nc, z_var="pic")
env['pic_satellite'] = pic
#poc
print "processing poc..."
poc_nc = netCDF4.Dataset("satellite_data/poc_monthly_compiled.nc")
poc = create_satellite_column(env=env, nc=poc_nc, z_var="poc")
env['poc_satellite'] = poc
#npp
print "processing npp..."
npp_nc = netCDF4.Dataset("satellite_data/npp_monthly_compiled.nc")
npp = create_satellite_column(env=env, nc=npp_nc, z_var="npp")
env['npp_satellite'] = npp

env.head()

processing sst...
processing par...
processing pic...
processing poc...
processing npp...


Unnamed: 0,TARA_sample_label,run_ids,filter_range,latitude,longitude,depth_sampled,year_sampled,month_sampled,chl_satellite,sst_satellite,par_satellite,pic_satellite,poc_satellite,npp_satellite
0,TARA_004_DCM_0.22-1.6,ERR598950|ERR599095,0.22-1.6,36.5533,-6.5669,40,2009,9,0.319966,22.81,42.7459,2.3e-05,76.5341,747.362
1,TARA_004_SRF_0.22-1.6,ERR598955|ERR599003,0.22-1.6,36.5533,-6.5669,5,2009,9,0.319966,22.81,42.7459,2.3e-05,76.5341,747.362
2,TARA_007_DCM_0.22-1.6,ERR315856,0.22-1.6,37.0541,1.9478,42,2009,9,0.14131,25.325,39.8782,1.2e-05,41.9895,417.218
3,TARA_007_SRF_0.22-1.6,ERR315857,0.22-1.6,37.051,1.9378,5,2009,9,0.14131,25.325,39.8782,1.2e-05,41.9895,417.218
4,TARA_009_DCM_0.22-1.6,ERR594315|ERR594329,0.22-1.6,39.0609,5.9422,55,2009,9,0.084704,26.425,38.8821,1.2e-05,31.1721,298.7


## Add historical satellite data

In [43]:
print 'time' in calcite_nc.variables and len(calcite_nc.variables['time'][:])==1
print 'time' in calcite_nc.variables and len(calcite_nc.variables['time'][:])!=1
print 'vert' in calcite_nc.variables and len(calcite_nc.variables['time'][:])==1
print 'vert' in calcite_nc.variables and len(calcite_nc.variables['time'][:])!=1
print calcite_nc['calcite']

True
False
False
False
<type 'netCDF4._netCDF4.Variable'>
float64 calcite(time, lat, lon)
    units: mole_per_cubic_meter
    missing_value: -9999
    long_name: Calcite concentration
    cell_methods: area: mean time: mean
unlimited dimensions: time
current shape = (1, 2160, 4320)
filling off



In [21]:
calcite_nc = netCDF4.Dataset("historical_data/calciteAnmeanBiooracle.nc")
print calcite_nc.variables.keys()

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'calcite']


In [44]:
print calcite_nc.variables['calcite']

<type 'netCDF4._netCDF4.Variable'>
float64 calcite(time, lat, lon)
    units: mole_per_cubic_meter
    missing_value: -9999
    long_name: Calcite concentration
    cell_methods: area: mean time: mean
unlimited dimensions: time
current shape = (1, 2160, 4320)
filling off



In [91]:
calcite = create_satellite_column(env=env, nc=calcite_nc, z_var='calcite', year_sampled='year_sampled', month_sampled='month_sampled', 
                            latitude='latitude', longitude='longitude', lat="lat",lon="lon",time="time", vert="vert",
                            index_order=['time', 'lat', 'lon'])

[0, 1518, 2081]
[0, 1518, 2081]
[0, 1524, 2183]
[0, 1524, 2183]
[0, 1548, 2231]
[0, 1549, 2230]
[0, 1509, 2331]
[0, 1509, 2331]
[0, 1509, 2331]
[0, 1509, 2331]
[0, 1558, 2368]
[0, 1586, 2372]
[0, 1586, 2372]
[0, 1586, 2372]
[0, 1552, 2392]
[0, 1552, 2392]
[0, 1552, 2392]
[0, 1552, 2392]
[0, 1487, 2553]
[0, 1487, 2553]
[0, 1487, 2554]
[0, 1405, 2578]
[0, 1405, 2578]
[0, 1361, 2606]
[0, 1361, 2606]
[0, 1360, 2606]
[0, 1360, 2606]
[0, 1343, 2619]
[0, 1301, 2638]
[0, 1301, 2638]
[0, 1300, 2638]
[0, 1300, 2638]
[0, 1300, 2638]
[0, 1329, 2922]
[0, 1329, 2922]
[0, 1329, 2922]
[0, 1329, 2922]
[0, 1329, 2922]
[0, 1330, 2923]
[0, 1330, 2923]
[0, 1308, 2934]
[0, 1308, 2934]
[0, 1308, 2934]
[0, 1308, 2934]
[0, 1308, 2933]
[0, 1308, 2933]
[0, 1308, 2933]
[0, 1303, 2957]
[0, 1303, 2957]
[0, 1304, 2956]
[0, 1304, 2956]
[0, 1303, 2959]
[0, 1303, 2959]
[0, 1254, 3000]
[0, 1254, 3000]
[0, 1255, 2999]
[0, 1255, 2999]
[0, 1255, 2999]
[0, 1151, 3046]
[0, 1151, 3046]
[0, 1152, 3046]
[0, 1152, 3046]
[0, 1080

In [93]:
env['calcite_annual_historical'] = calcite
env.head()

Unnamed: 0,TARA_sample_label,run_ids,filter_range,latitude,longitude,depth_sampled,year_sampled,month_sampled,chl_satellite,sst_satellite,par_satellite,pic_satellite,poc_satellite,npp_satellite,calcite_historical
0,TARA_004_DCM_0.22-1.6,ERR598950|ERR599095,0.22-1.6,36.5533,-6.5669,40,2009,9,0.319966,22.81,42.7459,2.3e-05,76.5341,747.362,0.001437
1,TARA_004_SRF_0.22-1.6,ERR598955|ERR599003,0.22-1.6,36.5533,-6.5669,5,2009,9,0.319966,22.81,42.7459,2.3e-05,76.5341,747.362,0.001437
2,TARA_007_DCM_0.22-1.6,ERR315856,0.22-1.6,37.0541,1.9478,42,2009,9,0.14131,25.325,39.8782,1.2e-05,41.9895,417.218,0.000125
3,TARA_007_SRF_0.22-1.6,ERR315857,0.22-1.6,37.051,1.9378,5,2009,9,0.14131,25.325,39.8782,1.2e-05,41.9895,417.218,0.000125
4,TARA_009_DCM_0.22-1.6,ERR594315|ERR594329,0.22-1.6,39.0609,5.9422,55,2009,9,0.084704,26.425,38.8821,1.2e-05,31.1721,298.7,8.9e-05


In [116]:
chlormo_nc = netCDF4.Dataset("historical_data/chloMomeanNASA.nc")
print chlormo_nc.variables.keys()
print chlormo_nc.variables['Chlorophyll_Concentration']
print chlormo_nc.variables['time']

[u'lon', u'lat', u'vert', u'time', u'Chlorophyll_Concentration']
<type 'netCDF4._netCDF4.Variable'>
float32 Chlorophyll_Concentration(lat, lon, vert, time)
    units: Milligrams_per_Cubic_Meter
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720, 1, 12)
filling off

<type 'netCDF4._netCDF4.Variable'>
float32 time(time)
    units: Month
unlimited dimensions: 
current shape = (12,)
filling off



In [117]:
chlormo = create_satellite_column(env=env, nc=chlormo_nc, z_var='Chlorophyll_Concentration', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon', 'vert', 'time'])

retrieving index [253, 346, 0, 8], month is 9: 0.467955
retrieving index [253, 346, 0, 8], month is 9: 0.467955
retrieving index [254, 363, 0, 8], month is 9: 0.118083
retrieving index [254, 363, 0, 8], month is 9: 0.118083
retrieving index [258, 371, 0, 8], month is 9: 0.0983434
retrieving index [258, 371, 0, 8], month is 9: 0.0983434
retrieving index [251, 388, 0, 10], month is 11: 0.116823
retrieving index [251, 388, 0, 10], month is 11: 0.116823
retrieving index [251, 388, 0, 10], month is 11: 0.116823
retrieving index [251, 388, 0, 10], month is 11: 0.116823
retrieving index [259, 394, 0, 10], month is 11: 0.282819
retrieving index [264, 395, 0, 10], month is 11: 0.231029
retrieving index [264, 395, 0, 10], month is 11: 0.231029
retrieving index [264, 395, 0, 10], month is 11: 0.231029
retrieving index [258, 398, 0, 10], month is 11: 0.172852
retrieving index [258, 398, 0, 10], month is 11: 0.172852
retrieving index [258, 398, 0, 10], month is 11: 0.172852
retrieving index [258, 3

In [118]:
env['chla_monthly_historical'] = chlormo

In [124]:
chloran_nc = netCDF4.Dataset("historical_data/chlorAnmeanBiooracle.nc")
print chloran_nc.variables.keys()
print chloran_nc.variables['chlor']
print chloran_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'chlor']
<type 'netCDF4._netCDF4.Variable'>
float64 chlor(time, lat, lon)
    units: milligrams_per_cubic_meter
    missing_value: -9999
    long_name: Chlorophyll A concentration
    cell_methods: area: mean time: mean
unlimited dimensions: time
current shape = (1, 2160, 4320)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (1,)
filling off



In [125]:
chloran = create_satellite_column(env=env, nc=chloran_nc, z_var='chlor', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','lat', 'lon'])

retrieving index [0, 1518, 2081], month is 9: 1.103
retrieving index [0, 1518, 2081], month is 9: 1.103
retrieving index [0, 1524, 2183], month is 9: 0.269
retrieving index [0, 1524, 2183], month is 9: 0.269
retrieving index [0, 1548, 2231], month is 9: 0.195
retrieving index [0, 1549, 2230], month is 9: 0.204
retrieving index [0, 1509, 2331], month is 11: 0.132
retrieving index [0, 1509, 2331], month is 11: 0.132
retrieving index [0, 1509, 2331], month is 11: 0.132
retrieving index [0, 1509, 2331], month is 11: 0.132
retrieving index [0, 1558, 2368], month is 11: 0.237
retrieving index [0, 1586, 2372], month is 11: 0.204
retrieving index [0, 1586, 2372], month is 11: 0.204
retrieving index [0, 1586, 2372], month is 11: 0.204
retrieving index [0, 1552, 2392], month is 11: 0.159
retrieving index [0, 1552, 2392], month is 11: 0.159
retrieving index [0, 1552, 2392], month is 11: 0.159
retrieving index [0, 1552, 2392], month is 11: 0.159
retrieving index [0, 1487, 2553], month is 12: 0.092

In [126]:
env['chla_annual_historical'] = chloran

In [128]:
chlrange_nc = netCDF4.Dataset("historical_data/chlorAnrangeBiooracle.nc")
print chlrange_nc.variables.keys()
print chlrange_nc.variables['chlorrange']
print chlrange_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'chlorrange']
<type 'netCDF4._netCDF4.Variable'>
float64 chlorrange(time, lat, lon)
    units: milligrams_per_cubic_meter
    missing_value: -9999
    long_name: Chlorophyll A concentration range
    cell_methods: area: mean time: range within years time: mean over years
unlimited dimensions: time
current shape = (1, 2160, 4320)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (1,)
filling off



In [139]:
chlrange = create_satellite_column(env=env, nc=chlrange_nc, z_var='chlorrange', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','lat', 'lon'])

retrieving index [0, 1518, 2081], month is 9: 3.43
retrieving index [0, 1518, 2081], month is 9: 3.43
retrieving index [0, 1524, 2183], month is 9: 0.372
retrieving index [0, 1524, 2183], month is 9: 0.372
retrieving index [0, 1548, 2231], month is 9: 0.295
retrieving index [0, 1549, 2230], month is 9: 0.309
retrieving index [0, 1509, 2331], month is 11: 0.135
retrieving index [0, 1509, 2331], month is 11: 0.135
retrieving index [0, 1509, 2331], month is 11: 0.135
retrieving index [0, 1509, 2331], month is 11: 0.135
retrieving index [0, 1558, 2368], month is 11: 0.251
retrieving index [0, 1586, 2372], month is 11: 0.209
retrieving index [0, 1586, 2372], month is 11: 0.209
retrieving index [0, 1586, 2372], month is 11: 0.209
retrieving index [0, 1552, 2392], month is 11: 0.114
retrieving index [0, 1552, 2392], month is 11: 0.114
retrieving index [0, 1552, 2392], month is 11: 0.114
retrieving index [0, 1552, 2392], month is 11: 0.114
retrieving index [0, 1487, 2553], month is 12: 0.09799

In [130]:
env['chla_annualrange_historical'] = chlrange

In [131]:
cloud_nc = netCDF4.Dataset("historical_data/cldAnmeanBiooracle.nc")
print cloud_nc.variables.keys()
print cloud_nc.variables['cld']
print cloud_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'cld']
<type 'netCDF4._netCDF4.Variable'>
float64 cld(time, lat, lon)
    units: percent
    missing_value: -9999
    long_name: Cloud fraction
    cell_methods: area: mean time: mean
unlimited dimensions: time
current shape = (1, 2160, 4320)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (1,)
filling off



In [132]:
cloudan = create_satellite_column(env=env, nc=cloud_nc, z_var='cld', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','lat', 'lon'])

retrieving index [0, 1518, 2081], month is 9: 0.428
retrieving index [0, 1518, 2081], month is 9: 0.428
retrieving index [0, 1524, 2183], month is 9: 0.558
retrieving index [0, 1524, 2183], month is 9: 0.558
retrieving index [0, 1548, 2231], month is 9: 0.588
retrieving index [0, 1549, 2230], month is 9: 0.605
retrieving index [0, 1509, 2331], month is 11: 0.489
retrieving index [0, 1509, 2331], month is 11: 0.489
retrieving index [0, 1509, 2331], month is 11: 0.489
retrieving index [0, 1509, 2331], month is 11: 0.489
retrieving index [0, 1558, 2368], month is 11: 0.49
retrieving index [0, 1586, 2372], month is 11: 0.566
retrieving index [0, 1586, 2372], month is 11: 0.566
retrieving index [0, 1586, 2372], month is 11: 0.566
retrieving index [0, 1552, 2392], month is 11: 0.564
retrieving index [0, 1552, 2392], month is 11: 0.564
retrieving index [0, 1552, 2392], month is 11: 0.564
retrieving index [0, 1552, 2392], month is 11: 0.564
retrieving index [0, 1487, 2553], month is 12: 0.502


In [133]:
env['cloudfraction_annual_historical'] = cloudan

In [134]:
cloudmo_nc = netCDF4.Dataset("historical_data/cloudfracMomeanNASA.nc")
print cloudmo_nc.variables.keys()
print cloudmo_nc.variables['cloud_fraction']
print cloudmo_nc.variables['time']

[u'lon', u'lat', u'vert', u'time', u'cloud_fraction']
<type 'netCDF4._netCDF4.Variable'>
float32 cloud_fraction(lat, lon, vert, time)
    units: NA
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720, 1, 12)
filling off

<type 'netCDF4._netCDF4.Variable'>
float32 time(time)
    units: Month
unlimited dimensions: 
current shape = (12,)
filling off



In [135]:
cloudmo = create_satellite_column(env=env, nc=cloudmo_nc, z_var='cloud_fraction', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat','lon','vert','time'])

retrieving index [253, 346, 0, 8], month is 9: 0.36565
retrieving index [253, 346, 0, 8], month is 9: 0.36565
retrieving index [254, 363, 0, 8], month is 9: 0.541831
retrieving index [254, 363, 0, 8], month is 9: 0.541831
retrieving index [258, 371, 0, 8], month is 9: 0.524114
retrieving index [258, 371, 0, 8], month is 9: 0.524114
retrieving index [251, 388, 0, 10], month is 11: 0.574241
retrieving index [251, 388, 0, 10], month is 11: 0.574241
retrieving index [251, 388, 0, 10], month is 11: 0.574241
retrieving index [251, 388, 0, 10], month is 11: 0.574241
retrieving index [259, 394, 0, 10], month is 11: 0.602925
retrieving index [264, 395, 0, 10], month is 11: 0.614173
retrieving index [264, 395, 0, 10], month is 11: 0.614173
retrieving index [264, 395, 0, 10], month is 11: 0.614173
retrieving index [258, 398, 0, 10], month is 11: 0.632171
retrieving index [258, 398, 0, 10], month is 11: 0.632171
retrieving index [258, 398, 0, 10], month is 11: 0.632171
retrieving index [258, 398, 

In [136]:
env['cloudfraction_monthly_historical'] = cloudmo

In [140]:
cloudsd_nc = netCDF4.Dataset("historical_data/cloudfracStdevNASA.nc")
print cloudsd_nc.variables.keys()
print cloudsd_nc.variables['AnnualStdev_Cloud_Fraction']

[u'lon', u'lat', u'AnnualStdev_Cloud_Fraction']
<type 'netCDF4._netCDF4.Variable'>
float32 AnnualStdev_Cloud_Fraction(lat, lon)
    units: Proportion
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [142]:
cloudsd = create_satellite_column(env=env, nc=cloudsd_nc, z_var='AnnualStdev_Cloud_Fraction', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 0.118942
retrieving index [253, 346], month is 9: 0.118942
retrieving index [254, 363], month is 9: 0.103758
retrieving index [254, 363], month is 9: 0.103758
retrieving index [258, 371], month is 9: 0.183366
retrieving index [258, 371], month is 9: 0.183366
retrieving index [251, 388], month is 11: 0.175382
retrieving index [251, 388], month is 11: 0.175382
retrieving index [251, 388], month is 11: 0.175382
retrieving index [251, 388], month is 11: 0.175382
retrieving index [259, 394], month is 11: 0.188279
retrieving index [264, 395], month is 11: 0.172292
retrieving index [264, 395], month is 11: 0.172292
retrieving index [264, 395], month is 11: 0.172292
retrieving index [258, 398], month is 11: 0.213355
retrieving index [258, 398], month is 11: 0.213355
retrieving index [258, 398], month is 11: 0.213355
retrieving index [258, 398], month is 11: 0.213355
retrieving index [247, 425], month is 12: 0.175622
retrieving index [247, 425], month is

In [143]:
env['cloudfraction_annualstdev_historical'] = cloudsd

In [148]:
da_nc = netCDF4.Dataset("historical_data/daAnmeanBiooracle.nc")
print da_nc.variables.keys()
print da_nc.variables['Diffuse_attenuation_coefficient']

[u'lon', u'lat', u'Diffuse_attenuation_coefficient']
<type 'netCDF4._netCDF4.Variable'>
float32 Diffuse_attenuation_coefficient(lat, lon)
    units: per_m
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [149]:
da = create_satellite_column(env=env, nc=da_nc, z_var='Diffuse_attenuation_coefficient', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 0.0936111
retrieving index [253, 346], month is 9: 0.0936111
retrieving index [254, 363], month is 9: 0.0509722
retrieving index [254, 363], month is 9: 0.0509722
retrieving index [258, 371], month is 9: 0.0438889
retrieving index [258, 371], month is 9: 0.0438889
retrieving index [251, 388], month is 11: 0.03475
retrieving index [251, 388], month is 11: 0.03475
retrieving index [251, 388], month is 11: 0.03475
retrieving index [251, 388], month is 11: 0.03475
retrieving index [259, 394], month is 11: 0.054
retrieving index [264, 395], month is 11: 0.0459444
retrieving index [264, 395], month is 11: 0.0459444
retrieving index [264, 395], month is 11: 0.0459444
retrieving index [258, 398], month is 11: 0.0382222
retrieving index [258, 398], month is 11: 0.0382222
retrieving index [258, 398], month is 11: 0.0382222
retrieving index [258, 398], month is 11: 0.0382222
retrieving index [247, 425], month is 12: 0.0291667
retrieving index [247, 425], m

In [150]:
env['diffuseattenuation_annual_historical'] = da

In [152]:
day_nc = netCDF4.Dataset("historical_data/daylengthMomeanEarthtools.nc")
print day_nc.variables.keys()
print day_nc.variables['Day_Length_on_15th_Day_of_Month']
print day_nc.variables['time']

[u'lon', u'lat', u'vert', u'time', u'Day_Length_on_15th_Day_of_Month']
<type 'netCDF4._netCDF4.Variable'>
float32 Day_Length_on_15th_Day_of_Month(lat, lon, vert, time)
    units: Hours
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720, 1, 12)
filling off

<type 'netCDF4._netCDF4.Variable'>
float32 time(time)
    units: Month
unlimited dimensions: 
current shape = (12,)
filling off



In [153]:
day = create_satellite_column(env=env, nc=day_nc, z_var='Day_Length_on_15th_Day_of_Month', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon', 'vert', 'time'])

retrieving index [253, 346, 0, 8], month is 9: 12.3953
retrieving index [253, 346, 0, 8], month is 9: 12.3953
retrieving index [254, 363, 0, 8], month is 9: 12.4011
retrieving index [254, 363, 0, 8], month is 9: 12.4011
retrieving index [258, 371, 0, 8], month is 9: 12.4247
retrieving index [258, 371, 0, 8], month is 9: 12.4247
retrieving index [251, 388, 0, 10], month is 11: 10.2311
retrieving index [251, 388, 0, 10], month is 11: 10.2311
retrieving index [251, 388, 0, 10], month is 11: 10.2311
retrieving index [251, 388, 0, 10], month is 11: 10.2311
retrieving index [259, 394, 0, 10], month is 11: 9.935
retrieving index [264, 395, 0, 10], month is 11: 9.73028
retrieving index [264, 395, 0, 10], month is 11: 9.73028
retrieving index [264, 395, 0, 10], month is 11: 9.73028
retrieving index [258, 398, 0, 10], month is 11: 9.97389
retrieving index [258, 398, 0, 10], month is 11: 9.97389
retrieving index [258, 398, 0, 10], month is 11: 9.97389
retrieving index [258, 398, 0, 10], month is 

In [154]:
env['daylength_monthly_historical'] = day

In [157]:
dustan_nc = netCDF4.Dataset("historical_data/dustAnmeanJickells.nc")
print dustan_nc.variables.keys()
print dustan_nc.variables['Dust_Deposition']

[u'lon', u'lat', u'Dust_Deposition']
<type 'netCDF4._netCDF4.Variable'>
float32 Dust_Deposition(lat, lon)
    units: Grams_per_Square_Meter
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [158]:
dustan = create_satellite_column(env=env, nc=dustan_nc, z_var='Dust_Deposition', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 9.13421e-11
retrieving index [253, 346], month is 9: 9.13421e-11
retrieving index [254, 363], month is 9: 1.52997e-10
retrieving index [254, 363], month is 9: 1.52997e-10
retrieving index [258, 371], month is 9: 1.04547e-10
retrieving index [258, 371], month is 9: 1.04547e-10
retrieving index [251, 388], month is 11: 2.12559e-10
retrieving index [251, 388], month is 11: 2.12559e-10
retrieving index [251, 388], month is 11: 2.12559e-10
retrieving index [251, 388], month is 11: 2.12559e-10
retrieving index [259, 394], month is 11: 2.16676e-10
retrieving index [264, 395], month is 11: 1.88248e-10
retrieving index [264, 395], month is 11: 1.88248e-10
retrieving index [264, 395], month is 11: 1.88248e-10
retrieving index [258, 398], month is 11: 2.17296e-10
retrieving index [258, 398], month is 11: 2.17296e-10
retrieving index [258, 398], month is 11: 2.17296e-10
retrieving index [258, 398], month is 11: 2.17296e-10
retrieving index [247, 425], month

In [159]:
env['dustflux_annual_historical'] = dustan

In [161]:
dustmo_nc = netCDF4.Dataset("historical_data/dustMomeanJickells.nc")
print dustmo_nc.variables.keys()
print dustmo_nc.variables['Dust_Deposition']
print dustmo_nc.variables['time']

[u'lon', u'lat', u'vert', u'time', u'Dust_Deposition']
<type 'netCDF4._netCDF4.Variable'>
float32 Dust_Deposition(lat, lon, vert, time)
    units: Grams_per_Square_Meter
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720, 1, 12)
filling off

<type 'netCDF4._netCDF4.Variable'>
float32 time(time)
    units: Month
unlimited dimensions: 
current shape = (12,)
filling off



In [162]:
dustmo = create_satellite_column(env=env, nc=dustmo_nc, z_var='Dust_Deposition', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon','vert','time'])

retrieving index [253, 346, 0, 8], month is 9: 1.01221e-10
retrieving index [253, 346, 0, 8], month is 9: 1.01221e-10
retrieving index [254, 363, 0, 8], month is 9: 1.85501e-10
retrieving index [254, 363, 0, 8], month is 9: 1.85501e-10
retrieving index [258, 371, 0, 8], month is 9: 9.88609e-11
retrieving index [258, 371, 0, 8], month is 9: 9.88609e-11
retrieving index [251, 388, 0, 10], month is 11: 2.11402e-10
retrieving index [251, 388, 0, 10], month is 11: 2.11402e-10
retrieving index [251, 388, 0, 10], month is 11: 2.11402e-10
retrieving index [251, 388, 0, 10], month is 11: 2.11402e-10
retrieving index [259, 394, 0, 10], month is 11: 2.35196e-10
retrieving index [264, 395, 0, 10], month is 11: 1.62966e-10
retrieving index [264, 395, 0, 10], month is 11: 1.62966e-10
retrieving index [264, 395, 0, 10], month is 11: 1.62966e-10
retrieving index [258, 398, 0, 10], month is 11: 2.73309e-10
retrieving index [258, 398, 0, 10], month is 11: 2.73309e-10
retrieving index [258, 398, 0, 10], 

In [163]:
env['dustflux_monthly_historical'] = dustmo

In [165]:
dustsd_nc = netCDF4.Dataset("historical_data/dustStdevJickells.nc")
print dustsd_nc.variables.keys()
print dustsd_nc.variables['AnnualStdev_Dust_Deposition']

[u'lon', u'lat', u'AnnualStdev_Dust_Deposition']
<type 'netCDF4._netCDF4.Variable'>
float32 AnnualStdev_Dust_Deposition(lat, lon)
    units: Grams_per_Square_Meter
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [166]:
dustsd = create_satellite_column(env=env, nc=dustsd_nc, z_var='AnnualStdev_Dust_Deposition', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 2.44769e-11
retrieving index [253, 346], month is 9: 2.44769e-11
retrieving index [254, 363], month is 9: 5.34428e-11
retrieving index [254, 363], month is 9: 5.34428e-11
retrieving index [258, 371], month is 9: 4.13626e-11
retrieving index [258, 371], month is 9: 4.13626e-11
retrieving index [251, 388], month is 11: 9.72197e-11
retrieving index [251, 388], month is 11: 9.72197e-11
retrieving index [251, 388], month is 11: 9.72197e-11
retrieving index [251, 388], month is 11: 9.72197e-11
retrieving index [259, 394], month is 11: 1.06867e-10
retrieving index [264, 395], month is 11: 1.09728e-10
retrieving index [264, 395], month is 11: 1.09728e-10
retrieving index [264, 395], month is 11: 1.09728e-10
retrieving index [258, 398], month is 11: 1.07345e-10
retrieving index [258, 398], month is 11: 1.07345e-10
retrieving index [258, 398], month is 11: 1.07345e-10
retrieving index [258, 398], month is 11: 1.07345e-10
retrieving index [247, 425], month

In [167]:
env['dustflux_annualstdev_historical'] = dustsd

In [170]:
insolan_nc = netCDF4.Dataset("historical_data/insolationAnmeanBiooracle.nc")
print insolan_nc.variables.keys()
print insolan_nc.variables['Solar_Insolation']

[u'lon', u'lat', u'Solar_Insolation']
<type 'netCDF4._netCDF4.Variable'>
float32 Solar_Insolation(lat, lon)
    units: W_per_Square_Meter
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [171]:
insolan = create_satellite_column(env=env, nc=insolan_nc, z_var='Solar_Insolation', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 225.052
retrieving index [253, 346], month is 9: 225.052
retrieving index [254, 363], month is 9: 219.747
retrieving index [254, 363], month is 9: 219.747
retrieving index [258, 371], month is 9: 212.421
retrieving index [258, 371], month is 9: 212.421
retrieving index [251, 388], month is 11: 231.079
retrieving index [251, 388], month is 11: 231.079
retrieving index [251, 388], month is 11: 231.079
retrieving index [251, 388], month is 11: 231.079
retrieving index [259, 394], month is 11: 206.214
retrieving index [264, 395], month is 11: 197.877
retrieving index [264, 395], month is 11: 197.877
retrieving index [264, 395], month is 11: 197.877
retrieving index [258, 398], month is 11: 214.948
retrieving index [258, 398], month is 11: 214.948
retrieving index [258, 398], month is 11: 214.948
retrieving index [258, 398], month is 11: 214.948
retrieving index [247, 425], month is 12: 247.32
retrieving index [247, 425], month is 12: 247.32
retrievi

In [172]:
env['solarinsolation_annual_historical'] = insolan

In [174]:
insolmo_nc = netCDF4.Dataset("historical_data/insolationMomeanNASA.nc")
print insolmo_nc.variables.keys()
print insolmo_nc.variables['solar_insolation']
print insolmo_nc.variables['time']

[u'lon', u'lat', u'vert', u'time', u'solar_insolation']
<type 'netCDF4._netCDF4.Variable'>
float32 solar_insolation(lat, lon, vert, time)
    units: W_per_square_m
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720, 1, 12)
filling off

<type 'netCDF4._netCDF4.Variable'>
float32 time(time)
    units: Month
unlimited dimensions: 
current shape = (12,)
filling off



In [175]:
insolmo = create_satellite_column(env=env, nc=insolmo_nc, z_var='solar_insolation', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon', 'vert', 'time'])

retrieving index [253, 346, 0, 8], month is 9: 251.181
retrieving index [253, 346, 0, 8], month is 9: 251.181
retrieving index [254, 363, 0, 8], month is 9: 243.757
retrieving index [254, 363, 0, 8], month is 9: 243.757
retrieving index [258, 371, 0, 8], month is 9: 239.426
retrieving index [258, 371, 0, 8], month is 9: 239.426
retrieving index [251, 388, 0, 10], month is 11: 135.696
retrieving index [251, 388, 0, 10], month is 11: 135.696
retrieving index [251, 388, 0, 10], month is 11: 135.696
retrieving index [251, 388, 0, 10], month is 11: 135.696
retrieving index [259, 394, 0, 10], month is 11: 106.102
retrieving index [264, 395, 0, 10], month is 11: 96.3583
retrieving index [264, 395, 0, 10], month is 11: 96.3583
retrieving index [264, 395, 0, 10], month is 11: 96.3583
retrieving index [258, 398, 0, 10], month is 11: 114.042
retrieving index [258, 398, 0, 10], month is 11: 114.042
retrieving index [258, 398, 0, 10], month is 11: 114.042
retrieving index [258, 398, 0, 10], month i

In [176]:
env['solarinsolation_monthly_historical'] = insolmo

In [178]:
insolsd_nc = netCDF4.Dataset("historical_data/insolationStdevNASA.nc")
print insolsd_nc.variables.keys()
print insolsd_nc.variables['AnnualStdev_Solar_Insolation']

[u'lon', u'lat', u'AnnualStdev_Solar_Insolation']
<type 'netCDF4._netCDF4.Variable'>
float32 AnnualStdev_Solar_Insolation(lat, lon)
    units: W_per_Square_Meter
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [179]:
insolsd = create_satellite_column(env=env, nc=insolsd_nc, z_var='AnnualStdev_Solar_Insolation', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 86.0385
retrieving index [253, 346], month is 9: 86.0385
retrieving index [254, 363], month is 9: 87.2079
retrieving index [254, 363], month is 9: 87.2079
retrieving index [258, 371], month is 9: 91.6684
retrieving index [258, 371], month is 9: 91.6684
retrieving index [251, 388], month is 11: 88.5882
retrieving index [251, 388], month is 11: 88.5882
retrieving index [251, 388], month is 11: 88.5882
retrieving index [251, 388], month is 11: 88.5882
retrieving index [259, 394], month is 11: 93.7185
retrieving index [264, 395], month is 11: 95.7459
retrieving index [264, 395], month is 11: 95.7459
retrieving index [264, 395], month is 11: 95.7459
retrieving index [258, 398], month is 11: 98.11
retrieving index [258, 398], month is 11: 98.11
retrieving index [258, 398], month is 11: 98.11
retrieving index [258, 398], month is 11: 98.11
retrieving index [247, 425], month is 12: 86.1425
retrieving index [247, 425], month is 12: 86.1425
retrieving ind

In [180]:
env['solarinsolation_annualstdev_historical'] = insolsd

In [182]:
land_nc = netCDF4.Dataset("historical_data/landdistAnmeanReady.nc")
print land_nc.variables.keys()
print land_nc.variables['LandDist']

[u'lon', u'lat', u'LandDist']
<type 'netCDF4._netCDF4.Variable'>
float32 LandDist(lat, lon)
    units: 
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [183]:
land = create_satellite_column(env=env, nc=land_nc, z_var='LandDist', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 44.7188
retrieving index [253, 346], month is 9: 44.7188
retrieving index [254, 363], month is 9: 92.4688
retrieving index [254, 363], month is 9: 92.4688
retrieving index [258, 371], month is 9: 170.328
retrieving index [258, 371], month is 9: 170.328
retrieving index [251, 388], month is 11: 70.875
retrieving index [251, 388], month is 11: 70.875
retrieving index [251, 388], month is 11: 70.875
retrieving index [251, 388], month is 11: 70.875
retrieving index [259, 394], month is 11: 30.2656
retrieving index [264, 395], month is 11: 58.5781
retrieving index [264, 395], month is 11: 58.5781
retrieving index [264, 395], month is 11: 58.5781
retrieving index [258, 398], month is 11: 73.0156
retrieving index [258, 398], month is 11: 73.0156
retrieving index [258, 398], month is 11: 73.0156
retrieving index [258, 398], month is 11: 73.0156
retrieving index [247, 425], month is 12: 149.797
retrieving index [247, 425], month is 12: 149.797
retrieving

In [184]:
env['distfromland_annual_historical'] = land

In [186]:
pyc_nc = netCDF4.Dataset("historical_data/mixedlayerdensityMomeanMontegut.nc")
print pyc_nc.variables.keys()
print pyc_nc.variables['Mixed_Layer_Depth']
print pyc_nc.variables['time']

[u'lon', u'lat', u'vert', u'time', u'Mixed_Layer_Depth']
<type 'netCDF4._netCDF4.Variable'>
float32 Mixed_Layer_Depth(lat, lon, vert, time)
    units: Meters
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720, 1, 12)
filling off

<type 'netCDF4._netCDF4.Variable'>
float32 time(time)
    units: Month
unlimited dimensions: 
current shape = (12,)
filling off



In [187]:
pyc = create_satellite_column(env=env, nc=pyc_nc, z_var='Mixed_Layer_Depth', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon', 'vert', 'time'])

retrieving index [253, 346, 0, 8], month is 9: 17.3355
retrieving index [253, 346, 0, 8], month is 9: 17.3355
retrieving index [254, 363, 0, 8], month is 9: 19.5528
retrieving index [254, 363, 0, 8], month is 9: 19.5528
retrieving index [258, 371, 0, 8], month is 9: 18.2925
retrieving index [258, 371, 0, 8], month is 9: 18.2925
retrieving index [251, 388, 0, 10], month is 11: 42.0924
retrieving index [251, 388, 0, 10], month is 11: 42.0924
retrieving index [251, 388, 0, 10], month is 11: 42.0924
retrieving index [251, 388, 0, 10], month is 11: 42.0924
retrieving index [259, 394, 0, 10], month is 11: 23.8249
retrieving index [264, 395, 0, 10], month is 11: 24.2877
retrieving index [264, 395, 0, 10], month is 11: 24.2877
retrieving index [264, 395, 0, 10], month is 11: 24.2877
retrieving index [258, 398, 0, 10], month is 11: 26.3117
retrieving index [258, 398, 0, 10], month is 11: 26.3117
retrieving index [258, 398, 0, 10], month is 11: 26.3117
retrieving index [258, 398, 0, 10], month i

In [188]:
env['pycnoclinedepth_monthly_historical'] = pyc

In [190]:
pycsd_nc = netCDF4.Dataset("historical_data/mixedlayerdensityStdevMontegut.nc")
print pycsd_nc.variables.keys()
print pycsd_nc.variables['AnnualStdev_Mixed_Layer_Depth_02']

[u'lon', u'lat', u'AnnualStdev_Mixed_Layer_Depth_02']
<type 'netCDF4._netCDF4.Variable'>
float32 AnnualStdev_Mixed_Layer_Depth_02(lat, lon)
    units: Meters
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [191]:
pycsd = create_satellite_column(env=env, nc=pycsd_nc, z_var='AnnualStdev_Mixed_Layer_Depth_02', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 20.7973
retrieving index [253, 346], month is 9: 20.7973
retrieving index [254, 363], month is 9: 8.09975
retrieving index [254, 363], month is 9: 8.09975
retrieving index [258, 371], month is 9: 12.672
retrieving index [258, 371], month is 9: 12.672
retrieving index [251, 388], month is 11: 19.189
retrieving index [251, 388], month is 11: 19.189
retrieving index [251, 388], month is 11: 19.189
retrieving index [251, 388], month is 11: 19.189
retrieving index [259, 394], month is 11: 11.1976
retrieving index [264, 395], month is 11: 13.9456
retrieving index [264, 395], month is 11: 13.9456
retrieving index [264, 395], month is 11: 13.9456
retrieving index [258, 398], month is 11: 17.556
retrieving index [258, 398], month is 11: 17.556
retrieving index [258, 398], month is 11: 17.556
retrieving index [258, 398], month is 11: 17.556
retrieving index [247, 425], month is 12: 23.995
retrieving index [247, 425], month is 12: 23.995
retrieving index [

In [192]:
env['pycnoclinedepth_annualstdev_historical'] = pycsd

In [194]:
therm_nc = netCDF4.Dataset("historical_data/mixedlayertempMomeanMontegut.nc")
print therm_nc.variables.keys()
print therm_nc.variables['Mixed_Layer_Depth']
print therm_nc.variables['time']

[u'lon', u'lat', u'vert', u'time', u'Mixed_Layer_Depth']
<type 'netCDF4._netCDF4.Variable'>
float32 Mixed_Layer_Depth(lat, lon, vert, time)
    units: Meters
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720, 1, 12)
filling off

<type 'netCDF4._netCDF4.Variable'>
float32 time(time)
    units: Month
unlimited dimensions: 
current shape = (12,)
filling off



In [195]:
therm = create_satellite_column(env=env, nc=therm_nc, z_var='Mixed_Layer_Depth', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon', 'vert', 'time'])

retrieving index [253, 346, 0, 8], month is 9: 18.4007
retrieving index [253, 346, 0, 8], month is 9: 18.4007
retrieving index [254, 363, 0, 8], month is 9: 18.0026
retrieving index [254, 363, 0, 8], month is 9: 18.0026
retrieving index [258, 371, 0, 8], month is 9: 21.6404
retrieving index [258, 371, 0, 8], month is 9: 21.6404
retrieving index [251, 388, 0, 10], month is 11: 40.4244
retrieving index [251, 388, 0, 10], month is 11: 40.4244
retrieving index [251, 388, 0, 10], month is 11: 40.4244
retrieving index [251, 388, 0, 10], month is 11: 40.4244
retrieving index [259, 394, 0, 10], month is 11: 33.813
retrieving index [264, 395, 0, 10], month is 11: 31.0778
retrieving index [264, 395, 0, 10], month is 11: 31.0778
retrieving index [264, 395, 0, 10], month is 11: 31.0778
retrieving index [258, 398, 0, 10], month is 11: 35.669
retrieving index [258, 398, 0, 10], month is 11: 35.669
retrieving index [258, 398, 0, 10], month is 11: 35.669
retrieving index [258, 398, 0, 10], month is 11

In [196]:
env['thermoclinedepth_monthly_historical'] = therm

In [198]:
thermsd_nc = netCDF4.Dataset("historical_data/mixedlayertempStdevMontegut.nc")
print thermsd_nc.variables.keys()
print thermsd_nc.variables['AnnualStdev_Mixed_Layer_Depth_11']

[u'lon', u'lat', u'AnnualStdev_Mixed_Layer_Depth_11']
<type 'netCDF4._netCDF4.Variable'>
float32 AnnualStdev_Mixed_Layer_Depth_11(lat, lon)
    units: Meters
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [199]:
thermsd = create_satellite_column(env=env, nc=thermsd_nc, z_var='AnnualStdev_Mixed_Layer_Depth_11', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 23.1315
retrieving index [253, 346], month is 9: 23.1315
retrieving index [254, 363], month is 9: 15.2284
retrieving index [254, 363], month is 9: 15.2284
retrieving index [258, 371], month is 9: 21.5965
retrieving index [258, 371], month is 9: 21.5965
retrieving index [251, 388], month is 11: 23.1055
retrieving index [251, 388], month is 11: 23.1055
retrieving index [251, 388], month is 11: 23.1055
retrieving index [251, 388], month is 11: 23.1055
retrieving index [259, 394], month is 11: 22.365
retrieving index [264, 395], month is 11: 23.2262
retrieving index [264, 395], month is 11: 23.2262
retrieving index [264, 395], month is 11: 23.2262
retrieving index [258, 398], month is 11: 24.1029
retrieving index [258, 398], month is 11: 24.1029
retrieving index [258, 398], month is 11: 24.1029
retrieving index [258, 398], month is 11: 24.1029
retrieving index [247, 425], month is 12: 30.1747
retrieving index [247, 425], month is 12: 30.1747
retriev

In [200]:
env['thermoclinedepth_annualstdev_historical'] = thermsd

In [202]:
nitrate_nc = netCDF4.Dataset("historical_data/nitrateAnmeanWOA.nc")
print nitrate_nc.variables.keys()
print nitrate_nc.variables['nitrate']
print nitrate_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'nitrate']
<type 'netCDF4._netCDF4.Variable'>
float64 nitrate(time, vert, lat, lon)
    units: micromoles_per_liter
    missing_value: -9999
    long_name: Mean nitrate concentration over 1955 to 2012
    cell_methods: area: mean depth: mean time: mean
unlimited dimensions: time
current shape = (1, 102, 180, 360)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (1,)
filling off



In [227]:
nitrate = create_satellite_column(env=env, nc=nitrate_nc, z_var='nitrate', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [0, 93, 126, 173], month is 9: 3.1971521
retrieving index [0, 100, 126, 173], month is 9: 2.0604415
retrieving index [0, 93, 127, 181], month is 9: 1.2885234
retrieving index [0, 100, 127, 181], month is 9: 0.88352376
retrieving index [0, 90, 129, 185], month is 9: 1.1754926
retrieving index [0, 100, 129, 185], month is 9: 1.2135535
retrieving index [0, 89, 125, 194], month is 11: 0.17409603
retrieving index [0, 89, 125, 194], month is 11: 0.17409603
retrieving index [0, 100, 125, 194], month is 11: 0.7499394
retrieving index [0, 100, 125, 194], month is 11: 0.7499394
retrieving index [0, 100, 129, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 100, 132, 197], month is 11: nan
retrieving index [0, 91, 129, 199], month is 11: 0.38048843
retrieving index [0, 91, 129, 199], month is 11: 0.38048843
retrieving index [0, 100, 129, 199], month is 11: 0.37757537
retrieving in

In [228]:
env['nitrate_annual_historical'] = nitrate

In [230]:
nitratemo_nc = netCDF4.Dataset("historical_data/nitrateMomeanWOA.nc")
print nitratemo_nc.variables.keys()
print nitratemo_nc.variables['nitrate']
print nitratemo_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'nitrate']
<type 'netCDF4._netCDF4.Variable'>
float64 nitrate(time, vert, lat, lon)
    units: micromoles_per_liter
    missing_value: -9999
    long_name: Mean monthly nitrate concentration 1955 to 2012
    cell_methods: area: mean depth:mean time: mean within years time: mean over years
unlimited dimensions: time
current shape = (12, 37, 180, 360)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (12,)
filling off



In [238]:
nitratemo = create_satellite_column(env=env, nc=nitratemo_nc, z_var='nitrate', year_sampled='year_sampled', month_sampled='month_sampled', 
                                  depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [8, 28, 126, 173], month is 9: 2.532777
retrieving index [8, 35, 126, 173], month is 9: 0.1070714
retrieving index [8, 28, 127, 181], month is 9: 0.72389126
retrieving index [8, 35, 127, 181], month is 9: 0.6430461
retrieving index [8, 25, 129, 185], month is 9: 0.23366079
retrieving index [8, 35, 129, 185], month is 9: 0.6288888
retrieving index [10, 24, 125, 194], month is 11: 0.0
retrieving index [10, 24, 125, 194], month is 11: 0.0
retrieving index [10, 35, 125, 194], month is 11: 0.0969384
retrieving index [10, 35, 125, 194], month is 11: 0.0969384
retrieving index [10, 35, 129, 197], month is 11: nan
retrieving index [10, 25, 132, 197], month is 11: nan
retrieving index [10, 25, 132, 197], month is 11: nan
retrieving index [10, 35, 132, 197], month is 11: nan
retrieving index [10, 26, 129, 199], month is 11: 0.16454335
retrieving index [10, 26, 129, 199], month is 11: 0.16454335
retrieving index [10, 35, 129, 199], month is 11: 0.3560131
retrieving index [10, 35,

In [239]:
env['nitrate_monthly_historical'] = nitratemo

In [241]:
np_nc = netCDF4.Dataset("historical_data/npratioMomeanWOA.nc")
print np_nc.variables.keys()
print np_nc.variables['Nitrate_Phosphate_Ratio']
print np_nc.variables['time']

[u'lon', u'lat', u'vert', u'time', u'Nitrate_Phosphate_Ratio']
<type 'netCDF4._netCDF4.Variable'>
float32 Nitrate_Phosphate_Ratio(lat, lon, vert, time)
    units: NA
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720, 14, 12)
filling off

<type 'netCDF4._netCDF4.Variable'>
float32 time(time)
    units: Months
unlimited dimensions: 
current shape = (12,)
filling off



In [257]:
np = create_satellite_column(env=env, nc=np_nc, z_var='Nitrate_Phosphate_Ratio', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon', 'vert', 'time'])

retrieving index [253, 346, 3, 8], month is 9: 13.5191
retrieving index [253, 346, 0, 8], month is 9: 11.3289
retrieving index [254, 363, 4, 8], month is 9: 4.97461
retrieving index [254, 363, 0, 8], month is 9: 5.87468
retrieving index [258, 371, 4, 8], month is 9: 1.20546
retrieving index [258, 371, 0, 8], month is 9: 3.7131
retrieving index [251, 388, 4, 10], month is 11: 38.9706
retrieving index [251, 388, 4, 10], month is 11: 38.9706
retrieving index [251, 388, 0, 10], month is 11: 22.4091
retrieving index [251, 388, 0, 10], month is 11: 22.4091
retrieving index [259, 394, 0, 10], month is 11: 9.375
retrieving index [264, 395, 4, 10], month is 11: 19.1828
retrieving index [264, 395, 4, 10], month is 11: 19.1828
retrieving index [264, 395, 0, 10], month is 11: 1.69724
retrieving index [258, 398, 4, 10], month is 11: 358.111
retrieving index [258, 398, 4, 10], month is 11: 358.111
retrieving index [258, 398, 0, 10], month is 11: 10.4817
retrieving index [258, 398, 0, 10], month is 1

In [258]:
env['npratio_monthly_historical'] = np

In [260]:
oceandep_nc = netCDF4.Dataset("historical_data/oceandepthAnmeanNASA.nc")
print oceandep_nc.variables.keys()
print oceandep_nc.variables['DepthMean']

[u'lon', u'lat', u'DepthMean']
<type 'netCDF4._netCDF4.Variable'>
float32 DepthMean(lat, lon)
    units: 
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [261]:
oceandep = create_satellite_column(env=env, nc=oceandep_nc, z_var='DepthMean', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 249.719
retrieving index [253, 346], month is 9: 249.719
retrieving index [254, 363], month is 9: 2510.06
retrieving index [254, 363], month is 9: 2510.06
retrieving index [258, 371], month is 9: 2845.47
retrieving index [258, 371], month is 9: 2845.47
retrieving index [251, 388], month is 11: 484.938
retrieving index [251, 388], month is 11: 484.938
retrieving index [251, 388], month is 11: 484.938
retrieving index [251, 388], month is 11: 484.938
retrieving index [259, 394], month is 11: 742.984
retrieving index [264, 395], month is 11: 511.562
retrieving index [264, 395], month is 11: 511.562
retrieving index [264, 395], month is 11: 511.562
retrieving index [258, 398], month is 11: 1345.95
retrieving index [258, 398], month is 11: 1345.95
retrieving index [258, 398], month is 11: 1345.95
retrieving index [258, 398], month is 11: 1345.95
retrieving index [247, 425], month is 12: 1765.75
retrieving index [247, 425], month is 12: 1765.75
retrie

In [262]:
env['oceandepth_historical'] = oceandep

In [265]:
oxyan_nc = netCDF4.Dataset("historical_data/oxygendissolvedAnmeanWOA.nc")
print oxyan_nc.variables.keys()
print oxyan_nc.variables['oxygendissolved']
print oxyan_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'oxygendissolved']
<type 'netCDF4._netCDF4.Variable'>
float64 oxygendissolved(time, vert, lat, lon)
    units: milliliters_per_liter
    missing_value: -9999
    long_name: Mean dissolved oxygen over 1955 to 2012
    cell_methods: area: mean depth: mean time: mean
unlimited dimensions: time
current shape = (1, 102, 180, 360)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (1,)
filling off



In [266]:
oxyan = create_satellite_column(env=env, nc=oxyan_nc, z_var='oxygendissolved', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [0, 93, 126, 173], month is 9: 5.4038653
retrieving index [0, 100, 126, 173], month is 9: 5.404876
retrieving index [0, 93, 127, 181], month is 9: 5.560367
retrieving index [0, 100, 127, 181], month is 9: 5.2991405
retrieving index [0, 90, 129, 185], month is 9: 5.4831557
retrieving index [0, 100, 129, 185], month is 9: 5.2846465
retrieving index [0, 89, 125, 194], month is 11: 5.420683
retrieving index [0, 89, 125, 194], month is 11: 5.420683
retrieving index [0, 100, 125, 194], month is 11: 5.15327
retrieving index [0, 100, 125, 194], month is 11: 5.15327
retrieving index [0, 100, 129, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 100, 132, 197], month is 11: nan
retrieving index [0, 91, 129, 199], month is 11: 5.541907
retrieving index [0, 91, 129, 199], month is 11: 5.541907
retrieving index [0, 100, 129, 199], month is 11: 5.2794166
retrieving index [0, 100, 129

In [267]:
env['oxygendissolved_annual_historical'] = oxyan

In [269]:
oxymo_nc = netCDF4.Dataset("historical_data/oxygendissolvedMomeanWOA.nc")
print oxymo_nc.variables.keys()
print oxymo_nc.variables['oxygendissolved']
print oxymo_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'oxygendissolved']
<type 'netCDF4._netCDF4.Variable'>
float64 oxygendissolved(time, vert, lat, lon)
    units: milliliters_per_liter
    missing_value: -9999
    long_name: Mean monthly dissolved oxygen 1955 to 2012
    cell_methods: area: mean depth:mean time: mean within years time: mean over years
unlimited dimensions: time
current shape = (12, 57, 180, 360)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (12,)
filling off



In [270]:
oxymo = create_satellite_column(env=env, nc=oxymo_nc, z_var='oxygendissolved', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [8, 48, 126, 173], month is 9: 5.4682713
retrieving index [8, 55, 126, 173], month is 9: 5.3061967
retrieving index [8, 48, 127, 181], month is 9: 5.8703833
retrieving index [8, 55, 127, 181], month is 9: 4.7538376
retrieving index [8, 45, 129, 185], month is 9: 5.750547
retrieving index [8, 55, 129, 185], month is 9: 4.7394547
retrieving index [10, 44, 125, 194], month is 11: 5.321117
retrieving index [10, 44, 125, 194], month is 11: 5.321117
retrieving index [10, 55, 125, 194], month is 11: 5.1648073
retrieving index [10, 55, 125, 194], month is 11: 5.1648073
retrieving index [10, 55, 129, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 55, 132, 197], month is 11: nan
retrieving index [10, 46, 129, 199], month is 11: 5.477915
retrieving index [10, 46, 129, 199], month is 11: 5.477915
retrieving index [10, 55, 129, 199], month is 11: 5.1287932
retrieving index [10,

In [271]:
env['oxygendissolved_monthly_historical'] = oxymo

In [273]:
oxysatan_nc = netCDF4.Dataset("historical_data/oxygensaturationAnmeanWOA.nc")
print oxysatan_nc.variables.keys()
print oxysatan_nc.variables['oxygensaturation']
print oxysatan_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'oxygensaturation']
<type 'netCDF4._netCDF4.Variable'>
float64 oxygensaturation(time, vert, lat, lon)
    units: percent
    missing_value: -9999
    long_name: Mean oxygen saturation over 1955 to 2012
    cell_methods: area: mean depth: mean time: mean
unlimited dimensions: time
current shape = (1, 102, 180, 360)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (1,)
filling off



In [274]:
oxysatan = create_satellite_column(env=env, nc=oxysatan_nc, z_var='oxygensaturation', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [0, 93, 126, 173], month is 9: 98.41392
retrieving index [0, 100, 126, 173], month is 9: 101.13584
retrieving index [0, 93, 127, 181], month is 9: 100.98099
retrieving index [0, 100, 127, 181], month is 9: 101.41058
retrieving index [0, 90, 129, 185], month is 9: 98.02551
retrieving index [0, 100, 129, 185], month is 9: 100.11605
retrieving index [0, 89, 125, 194], month is 11: 99.93076
retrieving index [0, 89, 125, 194], month is 11: 99.93076
retrieving index [0, 100, 125, 194], month is 11: 100.662445
retrieving index [0, 100, 125, 194], month is 11: 100.662445
retrieving index [0, 100, 129, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 100, 132, 197], month is 11: nan
retrieving index [0, 91, 129, 199], month is 11: 100.95907
retrieving index [0, 91, 129, 199], month is 11: 100.95907
retrieving index [0, 100, 129, 199], month is 11: 101.316505
retrieving index [0,

In [275]:
env['oxygensaturation_annual_historical'] = oxysatan

In [276]:
oxysatmo_nc = netCDF4.Dataset("historical_data/oxygensaturationMomeanWOA.nc")
print oxysatmo_nc.variables.keys()
print oxysatmo_nc.variables['oxygensaturation']
print oxysatmo_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'oxygensaturation']
<type 'netCDF4._netCDF4.Variable'>
float64 oxygensaturation(time, vert, lat, lon)
    units: percent
    missing_value: -9999
    long_name: Mean monthly oxygen saturation 1955 to 2012
    cell_methods: area: mean depth:mean time: mean within years time: mean over years
unlimited dimensions: time
current shape = (12, 57, 180, 360)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (12,)
filling off



In [277]:
oxysatmo = create_satellite_column(env=env, nc=oxysatmo_nc, z_var='oxygensaturation', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [8, 48, 126, 173], month is 9: 100.09857
retrieving index [8, 55, 126, 173], month is 9: 104.64737
retrieving index [8, 48, 127, 181], month is 9: 109.21028
retrieving index [8, 55, 127, 181], month is 9: 103.39148
retrieving index [8, 45, 129, 185], month is 9: 102.89043
retrieving index [8, 55, 129, 185], month is 9: 99.67444
retrieving index [10, 44, 125, 194], month is 11: 101.52337
retrieving index [10, 44, 125, 194], month is 11: 101.52337
retrieving index [10, 55, 125, 194], month is 11: 100.823326
retrieving index [10, 55, 125, 194], month is 11: 100.823326
retrieving index [10, 55, 129, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 55, 132, 197], month is 11: nan
retrieving index [10, 46, 129, 199], month is 11: 101.34242
retrieving index [10, 46, 129, 199], month is 11: 101.34242
retrieving index [10, 55, 129, 199], month is 11: 101.72785
retrieving inde

In [278]:
env['oxygensaturation_monthly_historical'] = oxysatmo

In [280]:
aouan_nc = netCDF4.Dataset("historical_data/oxygenutilizationAnmeanWOA.nc")
print aouan_nc.variables.keys()
print aouan_nc.variables['oxygenutilization']
print aouan_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'oxygenutilization']
<type 'netCDF4._netCDF4.Variable'>
float64 oxygenutilization(time, vert, lat, lon)
    units: milliliters_per_liter
    missing_value: -9999
    long_name: Mean apparent oxygen utilization over 1955 to 2012
    cell_methods: area: mean depth: mean time: mean
unlimited dimensions: time
current shape = (1, 102, 180, 360)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (1,)
filling off



In [281]:
aouan = create_satellite_column(env=env, nc=aouan_nc, z_var='oxygenutilization', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [0, 93, 126, 173], month is 9: 0.0934844
retrieving index [0, 100, 126, 173], month is 9: -0.058681753
retrieving index [0, 93, 127, 181], month is 9: -0.043478277
retrieving index [0, 100, 127, 181], month is 9: -0.06997166
retrieving index [0, 90, 129, 185], month is 9: 0.11576132
retrieving index [0, 100, 129, 185], month is 9: -0.00094092666
retrieving index [0, 89, 125, 194], month is 11: 0.007798686
retrieving index [0, 89, 125, 194], month is 11: 0.007798686
retrieving index [0, 100, 125, 194], month is 11: -0.02833496
retrieving index [0, 100, 125, 194], month is 11: -0.02833496
retrieving index [0, 100, 129, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 100, 132, 197], month is 11: nan
retrieving index [0, 91, 129, 199], month is 11: -0.052169185
retrieving index [0, 91, 129, 199], month is 11: -0.052169185
retrieving index [0, 100, 129, 199], month is 11: -

In [282]:
env['oxygenutilization_annual_historical'] = aouan

In [284]:
aoumo_nc = netCDF4.Dataset("historical_data/oxygenutilizationMomeanWOA.nc")
print aoumo_nc.variables.keys()
print aoumo_nc.variables['oxygenutilization']
print aoumo_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'oxygenutilization']
<type 'netCDF4._netCDF4.Variable'>
float64 oxygenutilization(time, vert, lat, lon)
    units: milliliters_per_liter
    missing_value: -9999
    long_name: Mean monthly oxygen utilization over 1955 to 2012
    cell_methods: area: mean depth:mean time: mean within years time: mean over years
unlimited dimensions: time
current shape = (12, 57, 180, 360)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (12,)
filling off



In [285]:
aoumo = create_satellite_column(env=env, nc=aoumo_nc, z_var='oxygenutilization', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [8, 48, 126, 173], month is 9: 0.008546714
retrieving index [8, 55, 126, 173], month is 9: -0.22843562
retrieving index [8, 48, 127, 181], month is 9: -0.48890427
retrieving index [8, 55, 127, 181], month is 9: -0.15445197
retrieving index [8, 45, 129, 185], month is 9: -0.15777637
retrieving index [8, 55, 129, 185], month is 9: 0.01484523
retrieving index [10, 44, 125, 194], month is 11: -0.08050419
retrieving index [10, 44, 125, 194], month is 11: -0.08050419
retrieving index [10, 55, 125, 194], month is 11: -0.044260886
retrieving index [10, 55, 125, 194], month is 11: -0.044260886
retrieving index [10, 55, 129, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 55, 132, 197], month is 11: nan
retrieving index [10, 46, 129, 199], month is 11: -0.07027642
retrieving index [10, 46, 129, 199], month is 11: -0.07027642
retrieving index [10, 55, 129, 199], month is 11: -

In [286]:
env['oxygenutilization_monthly_historical'] = aoumo

In [287]:
par_nc = netCDF4.Dataset("historical_data/parAnmeanBiooracle.nc")
print par_nc.variables.keys()
print par_nc.variables['par']
print par_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'par']
<type 'netCDF4._netCDF4.Variable'>
float64 par(time, lat, lon)
    units: Einstein_per_square_meter_per_day
    missing_value: -9999
    long_name: Photosynthetically Available Radiation
    cell_methods: area: mean time: mean
unlimited dimensions: time
current shape = (1, 2160, 4320)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (1,)
filling off



In [288]:
par = create_satellite_column(env=env, nc=par_nc, z_var='par', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','lat', 'lon'])

retrieving index [0, 1518, 2081], month is 9: 39.585
retrieving index [0, 1518, 2081], month is 9: 39.585
retrieving index [0, 1524, 2183], month is 9: 38.449
retrieving index [0, 1524, 2183], month is 9: 38.449
retrieving index [0, 1548, 2231], month is 9: 36.853
retrieving index [0, 1549, 2230], month is 9: 36.746
retrieving index [0, 1509, 2331], month is 11: 39.253
retrieving index [0, 1509, 2331], month is 11: 39.253
retrieving index [0, 1509, 2331], month is 11: 39.253
retrieving index [0, 1509, 2331], month is 11: 39.253
retrieving index [0, 1558, 2368], month is 11: 36.073
retrieving index [0, 1586, 2372], month is 11: 34.772
retrieving index [0, 1586, 2372], month is 11: 34.772
retrieving index [0, 1586, 2372], month is 11: 34.772
retrieving index [0, 1552, 2392], month is 11: 36.405
retrieving index [0, 1552, 2392], month is 11: 36.405
retrieving index [0, 1552, 2392], month is 11: 36.405
retrieving index [0, 1552, 2392], month is 11: 36.405
retrieving index [0, 1487, 2553], 

In [289]:
env['par_annual_historical'] = par

In [291]:
ph_nc = netCDF4.Dataset("historical_data/phAnmeanBiooracle.nc")
print ph_nc.variables.keys()
print ph_nc.variables['pH']

[u'lon', u'lat', u'pH']
<type 'netCDF4._netCDF4.Variable'>
float32 pH(lat, lon)
    units: --
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [292]:
ph = create_satellite_column(env=env, nc=ph_nc, z_var='pH', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 8.08922
retrieving index [253, 346], month is 9: 8.08922
retrieving index [254, 363], month is 9: 8.18578
retrieving index [254, 363], month is 9: 8.18578
retrieving index [258, 371], month is 9: 8.20806
retrieving index [258, 371], month is 9: 8.20806
retrieving index [251, 388], month is 11: 8.21286
retrieving index [251, 388], month is 11: 8.21286
retrieving index [251, 388], month is 11: 8.21286
retrieving index [251, 388], month is 11: 8.21286
retrieving index [259, 394], month is 11: 8.25097
retrieving index [264, 395], month is 11: 8.26206
retrieving index [264, 395], month is 11: 8.26206
retrieving index [264, 395], month is 11: 8.26206
retrieving index [258, 398], month is 11: 8.24944
retrieving index [258, 398], month is 11: 8.24944
retrieving index [258, 398], month is 11: 8.24944
retrieving index [258, 398], month is 11: 8.24944
retrieving index [247, 425], month is 12: 8.13236
retrieving index [247, 425], month is 12: 8.13236
retrie

In [293]:
env['ph_annual_historical'] = ph

In [294]:
phosan_nc = netCDF4.Dataset("historical_data/phosphateAnmeanWOA.nc")
print phosan_nc.variables.keys()
print phosan_nc.variables['phosphate']
print phosan_nc.variables['time']

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'phosphate']
<type 'netCDF4._netCDF4.Variable'>
float64 phosphate(time, vert, lat, lon)
    units: micromoles_per_liter
    missing_value: -9999
    long_name: Mean phosphate concentration over 1955 to 2012
    cell_methods: area: mean depth: mean time: mean
unlimited dimensions: time
current shape = (1, 102, 180, 360)
filling off

<type 'netCDF4._netCDF4.Variable'>
float64 time(time)
    units: days since 1970-01-01
    calendar: gregorian
    axis: T
    long_name: time
    standard_name: time
    climatology: climatology_bounds
unlimited dimensions: time
current shape = (1,)
filling off



In [295]:
phosan = create_satellite_column(env=env, nc=phosan_nc, z_var='phosphate', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [0, 93, 126, 173], month is 9: 0.22646229
retrieving index [0, 100, 126, 173], month is 9: 0.1852494
retrieving index [0, 93, 127, 181], month is 9: 0.14162205
retrieving index [0, 100, 127, 181], month is 9: 0.13790019
retrieving index [0, 90, 129, 185], month is 9: 0.116650306
retrieving index [0, 100, 129, 185], month is 9: 0.13194695
retrieving index [0, 89, 125, 194], month is 11: 0.07982441
retrieving index [0, 89, 125, 194], month is 11: 0.07982441
retrieving index [0, 100, 125, 194], month is 11: 0.08330544
retrieving index [0, 100, 125, 194], month is 11: 0.08330544
retrieving index [0, 100, 129, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 100, 132, 197], month is 11: nan
retrieving index [0, 91, 129, 199], month is 11: 0.04847939
retrieving index [0, 91, 129, 199], month is 11: 0.04847939
retrieving index [0, 100, 129, 199], month is 11: 0.05648988
retrie

In [296]:
env['phosphate_annual_historical'] = phosan

In [299]:
phosmo_nc = netCDF4.Dataset("historical_data/phosphateMomeanWOA.nc")
print phosmo_nc.variables.keys()
print phosmo_nc.variables['phosphate']
print phosmo_nc.variables['time'][:]

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'phosphate']
<type 'netCDF4._netCDF4.Variable'>
float64 phosphate(time, vert, lat, lon)
    units: micromoles_per_liter
    missing_value: -9999
    long_name: Mean monthly phosphate concentration 1955 to 2012
    cell_methods: area: mean depth:mean time: mean within years time: mean over years
unlimited dimensions: time
current shape = (12, 37, 180, 360)
filling off

[ 15340.  15371.  15400.  15431.  15461.  15492.  15522.  15553.  15584.
  15614.  15645.  15675.]


In [298]:
phosmo = create_satellite_column(env=env, nc=phosmo_nc, z_var='phosphate', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [8, 28, 126, 173], month is 9: 0.19353284
retrieving index [8, 35, 126, 173], month is 9: 0.1043292
retrieving index [8, 28, 127, 181], month is 9: 0.19374645
retrieving index [8, 35, 127, 181], month is 9: 0.18696396
retrieving index [8, 25, 129, 185], month is 9: 0.13408388
retrieving index [8, 35, 129, 185], month is 9: 0.17710672
retrieving index [10, 24, 125, 194], month is 11: 0.020510064
retrieving index [10, 24, 125, 194], month is 11: 0.020510064
retrieving index [10, 35, 125, 194], month is 11: 0.01806391
retrieving index [10, 35, 125, 194], month is 11: 0.01806391
retrieving index [10, 35, 129, 197], month is 11: nan
retrieving index [10, 25, 132, 197], month is 11: nan
retrieving index [10, 25, 132, 197], month is 11: nan
retrieving index [10, 35, 132, 197], month is 11: nan
retrieving index [10, 26, 129, 199], month is 11: 0.0
retrieving index [10, 26, 129, 199], month is 11: 0.0
retrieving index [10, 35, 129, 199], month is 11: 0.014549278
retrieving inde

In [300]:
env['phosphate_monthly_historical'] = phosmo

In [302]:
salan_nc = netCDF4.Dataset("historical_data/salinityAnmeanWOA.nc")
print salan_nc.variables.keys()
print salan_nc.variables['salinity']
print salan_nc.variables['time'][:]

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'salinity']
<type 'netCDF4._netCDF4.Variable'>
float64 salinity(time, vert, lat, lon)
    units: 1
    missing_value: -9999
    long_name: Mean salinity over 1955 to 2012
    cell_methods: area: mean depth: mean time: mean
unlimited dimensions: time
current shape = (1, 102, 180, 360)
filling off

[ 15705.]


In [303]:
salan = create_satellite_column(env=env, nc=salan_nc, z_var='salinity', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [0, 93, 126, 173], month is 9: 36.185513
retrieving index [0, 100, 126, 173], month is 9: 36.16469
retrieving index [0, 93, 127, 181], month is 9: 37.34
retrieving index [0, 100, 127, 181], month is 9: 37.10259
retrieving index [0, 90, 129, 185], month is 9: 37.70839
retrieving index [0, 100, 129, 185], month is 9: 37.5164
retrieving index [0, 89, 125, 194], month is 11: 37.93251
retrieving index [0, 89, 125, 194], month is 11: 37.93251
retrieving index [0, 100, 125, 194], month is 11: 37.838688
retrieving index [0, 100, 125, 194], month is 11: 37.838688
retrieving index [0, 100, 129, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 100, 132, 197], month is 11: nan
retrieving index [0, 91, 129, 199], month is 11: 38.6232
retrieving index [0, 91, 129, 199], month is 11: 38.6232
retrieving index [0, 100, 129, 199], month is 11: 38.38901
retrieving index [0, 100, 129, 199]

In [304]:
env['salinity_annual_historical'] = salan

In [305]:
salmo_nc = netCDF4.Dataset("historical_data/salinityMomeanWOA.nc")
print salmo_nc.variables.keys()
print salmo_nc.variables['salinity']
print salmo_nc.variables['time'][:]

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'salinity']
<type 'netCDF4._netCDF4.Variable'>
float64 salinity(time, vert, lat, lon)
    units: 1
    missing_value: -9999
    long_name: Mean monthly salinity 1955 to 2012
    cell_methods: area: mean depth:mean time: mean within years time: mean over years
unlimited dimensions: time
current shape = (12, 57, 180, 360)
filling off

[ 15340.  15371.  15400.  15431.  15461.  15492.  15522.  15553.  15584.
  15614.  15645.  15675.]


In [306]:
salmo = create_satellite_column(env=env, nc=salmo_nc, z_var='salinity', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [8, 48, 126, 173], month is 9: 36.149612
retrieving index [8, 55, 126, 173], month is 9: 36.27609
retrieving index [8, 48, 127, 181], month is 9: 37.235588
retrieving index [8, 55, 127, 181], month is 9: 37.01569
retrieving index [8, 45, 129, 185], month is 9: 37.6838
retrieving index [8, 55, 129, 185], month is 9: 37.60551
retrieving index [10, 44, 125, 194], month is 11: 37.962112
retrieving index [10, 44, 125, 194], month is 11: 37.962112
retrieving index [10, 55, 125, 194], month is 11: 37.950512
retrieving index [10, 55, 125, 194], month is 11: 37.950512
retrieving index [10, 55, 129, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 55, 132, 197], month is 11: nan
retrieving index [10, 46, 129, 199], month is 11: 38.67249
retrieving index [10, 46, 129, 199], month is 11: 38.67249
retrieving index [10, 55, 129, 199], month is 11: 38.522087
retrieving index [10, 5

In [307]:
env['salinity_monthly_historical'] = salmo

In [308]:
silicatean_nc = netCDF4.Dataset("historical_data/silicateAnmeanWOA.nc")
print silicatean_nc.variables.keys()
print silicatean_nc.variables['silicate']
print silicatean_nc.variables['time'][:]

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'silicate']
<type 'netCDF4._netCDF4.Variable'>
float64 silicate(time, vert, lat, lon)
    units: micromoles_per_liter
    missing_value: -9999
    long_name: Mean silicate concentration over 1955 to 2012
    cell_methods: area: mean depth: mean time: mean
unlimited dimensions: time
current shape = (1, 102, 180, 360)
filling off

[ 15705.]


In [309]:
silicatean = create_satellite_column(env=env, nc=silicatean_nc, z_var='silicate', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [0, 93, 126, 173], month is 9: 2.1063387
retrieving index [0, 100, 126, 173], month is 9: 1.4273912
retrieving index [0, 93, 127, 181], month is 9: 1.7631569
retrieving index [0, 100, 127, 181], month is 9: 1.0221391
retrieving index [0, 90, 129, 185], month is 9: 2.0382197
retrieving index [0, 100, 129, 185], month is 9: 1.5389622
retrieving index [0, 89, 125, 194], month is 11: 1.9035501
retrieving index [0, 89, 125, 194], month is 11: 1.9035501
retrieving index [0, 100, 125, 194], month is 11: 1.814042
retrieving index [0, 100, 125, 194], month is 11: 1.814042
retrieving index [0, 100, 129, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 100, 132, 197], month is 11: nan
retrieving index [0, 91, 129, 199], month is 11: 2.4741402
retrieving index [0, 91, 129, 199], month is 11: 2.4741402
retrieving index [0, 100, 129, 199], month is 11: 1.9551483
retrieving index [0, 

In [310]:
env['silicate_annual_historical'] = silicatean

In [311]:
silicatemo_nc = netCDF4.Dataset("historical_data/silicateMomeanWOA.nc")
print silicatemo_nc.variables.keys()
print silicatemo_nc.variables['silicate']
print silicatemo_nc.variables['time'][:]

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'silicate']
<type 'netCDF4._netCDF4.Variable'>
float64 silicate(time, vert, lat, lon)
    units: micromoles_per_liter
    missing_value: -9999
    long_name: Mean monthly silicate concentration 1955 to 2012
    cell_methods: area: mean depth:mean time: mean within years time: mean over years
unlimited dimensions: time
current shape = (12, 37, 180, 360)
filling off

[ 15340.  15371.  15400.  15431.  15461.  15492.  15522.  15553.  15584.
  15614.  15645.  15675.]


In [312]:
silicatemo = create_satellite_column(env=env, nc=silicatemo_nc, z_var='silicate', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [8, 28, 126, 173], month is 9: 0.44057465
retrieving index [8, 35, 126, 173], month is 9: 0.06769393
retrieving index [8, 28, 127, 181], month is 9: 0.7858094
retrieving index [8, 35, 127, 181], month is 9: 0.0
retrieving index [8, 25, 129, 185], month is 9: 0.9504701
retrieving index [8, 35, 129, 185], month is 9: 0.8597194
retrieving index [10, 24, 125, 194], month is 11: 1.7645429
retrieving index [10, 24, 125, 194], month is 11: 1.7645429
retrieving index [10, 35, 125, 194], month is 11: 2.5671427
retrieving index [10, 35, 125, 194], month is 11: 2.5671427
retrieving index [10, 35, 129, 197], month is 11: nan
retrieving index [10, 25, 132, 197], month is 11: nan
retrieving index [10, 25, 132, 197], month is 11: nan
retrieving index [10, 35, 132, 197], month is 11: nan
retrieving index [10, 26, 129, 199], month is 11: 1.7153335
retrieving index [10, 26, 129, 199], month is 11: 1.7153335
retrieving index [10, 35, 129, 199], month is 11: 1.3467094
retrieving index [10

In [313]:
env['silicate_monthly_historical'] = silicatemo

In [316]:
sstan_nc = netCDF4.Dataset("historical_data/sstAnmeanBiooracle.nc")
print sstan_nc.variables.keys()
print sstan_nc.variables['Sea_surface_temperature_mean']

[u'lon', u'lat', u'Sea_surface_temperature_mean']
<type 'netCDF4._netCDF4.Variable'>
float32 Sea_surface_temperature_mean(lat, lon)
    units: Degrees_C
    missing_value: -9999
unlimited dimensions: 
current shape = (360, 720)
filling off



In [317]:
sstan = create_satellite_column(env=env, nc=sstan_nc, z_var='Sea_surface_temperature_mean', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['lat', 'lon'])

retrieving index [253, 346], month is 9: 18.8708
retrieving index [253, 346], month is 9: 18.8708
retrieving index [254, 363], month is 9: 19.366
retrieving index [254, 363], month is 9: 19.366
retrieving index [258, 371], month is 9: 19.4889
retrieving index [258, 371], month is 9: 19.4889
retrieving index [251, 388], month is 11: 20.7615
retrieving index [251, 388], month is 11: 20.7615
retrieving index [251, 388], month is 11: 20.7615
retrieving index [251, 388], month is 11: 20.7615
retrieving index [259, 394], month is 11: 19.476
retrieving index [264, 395], month is 11: 18.9055
retrieving index [264, 395], month is 11: 18.9055
retrieving index [264, 395], month is 11: 18.9055
retrieving index [258, 398], month is 11: 19.8001
retrieving index [258, 398], month is 11: 19.8001
retrieving index [258, 398], month is 11: 19.8001
retrieving index [258, 398], month is 11: 19.8001
retrieving index [247, 425], month is 12: 22.1904
retrieving index [247, 425], month is 12: 22.1904
retrievin

In [318]:
env['sst_annual_historical'] = sstan

In [320]:
tempmo_nc = netCDF4.Dataset("historical_data/temperatureMomeanWOA.nc")
print tempmo_nc.variables.keys()
print tempmo_nc.variables['temperature']
print tempmo_nc.variables['time'][:]

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'temperature']
<type 'netCDF4._netCDF4.Variable'>
float64 temperature(time, vert, lat, lon)
    units: degrees_celsius
    missing_value: -9999
    long_name: Mean monthly temperature 1955 to 2012
    cell_methods: area: mean depth:mean time: mean within years time: mean over years
unlimited dimensions: time
current shape = (12, 57, 180, 360)
filling off

[ 15340.  15371.  15400.  15431.  15461.  15492.  15522.  15553.  15584.
  15614.  15645.  15675.]


In [321]:
tempmo = create_satellite_column(env=env, nc=tempmo_nc, z_var='temperature', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [8, 48, 126, 173], month is 9: 17.918491
retrieving index [8, 55, 126, 173], month is 9: 21.37179
retrieving index [8, 48, 127, 181], month is 9: 16.97089
retrieving index [8, 55, 127, 181], month is 9: 24.6326
retrieving index [8, 45, 129, 185], month is 9: 16.147
retrieving index [8, 55, 129, 185], month is 9: 24.140291
retrieving index [10, 44, 125, 194], month is 11: 17.43611
retrieving index [10, 44, 125, 194], month is 11: 17.43611
retrieving index [10, 55, 125, 194], month is 11: 20.54201
retrieving index [10, 55, 125, 194], month is 11: 20.54201
retrieving index [10, 55, 129, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 45, 132, 197], month is 11: nan
retrieving index [10, 55, 132, 197], month is 11: nan
retrieving index [10, 46, 129, 199], month is 11: 16.814291
retrieving index [10, 46, 129, 199], month is 11: 16.814291
retrieving index [10, 55, 129, 199], month is 11: 18.7183
retrieving index [10, 55, 129

In [322]:
env['oceantemp_monthly_historical'] = tempmo

In [324]:
tempan_nc = netCDF4.Dataset("historical_data/watertempAnmeanWOA.nc")
print tempan_nc.variables.keys()
print tempan_nc.variables['watertemp']
print tempan_nc.variables['time'][:]

[u'lat', u'lat_bnds', u'lon', u'lon_bnds', u'time', u'climatology_bounds', u'vert', u'vert_bnds', u'watertemp']
<type 'netCDF4._netCDF4.Variable'>
float64 watertemp(time, vert, lat, lon)
    units: degrees_celsius
    missing_value: -9999
    long_name: Mean water temperature over 1955 to 2012
    cell_methods: area: mean depth: mean time: mean
unlimited dimensions: time
current shape = (1, 102, 180, 360)
filling off

[ 15705.]


In [325]:
tempan = create_satellite_column(env=env, nc=tempan_nc, z_var='watertemp', 
                                  year_sampled='year_sampled', month_sampled='month_sampled', depth_sampled='depth_sampled', latitude='latitude', longitude='longitude', 
                                  lat="lat",lon="lon",time="time", vert="vert",
                                  index_order=['time','vert','lat', 'lon'])

retrieving index [0, 93, 126, 173], month is 9: 16.9933
retrieving index [0, 100, 126, 173], month is 9: 18.4447
retrieving index [0, 93, 127, 181], month is 9: 15.9158
retrieving index [0, 100, 127, 181], month is 9: 19.1342
retrieving index [0, 90, 129, 185], month is 9: 15.100591
retrieving index [0, 100, 129, 185], month is 9: 18.81649
retrieving index [0, 89, 125, 194], month is 11: 16.0883
retrieving index [0, 89, 125, 194], month is 11: 16.0883
retrieving index [0, 100, 125, 194], month is 11: 19.89151
retrieving index [0, 100, 125, 194], month is 11: 19.89151
retrieving index [0, 100, 129, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 90, 132, 197], month is 11: nan
retrieving index [0, 100, 132, 197], month is 11: nan
retrieving index [0, 91, 129, 199], month is 11: 15.53121
retrieving index [0, 91, 129, 199], month is 11: 15.53121
retrieving index [0, 100, 129, 199], month is 11: 19.10079
retrieving index [0, 100, 129, 199], 

In [326]:
env['oceantemp_annual_historical'] = tempan

In [327]:
env.head()


Unnamed: 0,TARA_sample_label,run_ids,filter_range,latitude,longitude,depth_sampled,year_sampled,month_sampled,chl_satellite,sst_satellite,...,ph_annual_historical,phosphate_annual_historical,phosphate_monthly_historical,salinity_annual_historical,salinity_monthly_historical,silicate_annual_historical,silicate_monthly_historical,sst_annual_historical,oceantemp_monthly_historical,oceantemp_annual_historical
0,TARA_004_DCM_0.22-1.6,ERR598950|ERR599095,0.22-1.6,36.5533,-6.5669,40,2009,9,0.319966,22.81,...,8.089222,0.226462,0.193533,36.185513,36.149612,2.106339,0.440575,18.87075,17.918491,16.9933
1,TARA_004_SRF_0.22-1.6,ERR598955|ERR599003,0.22-1.6,36.5533,-6.5669,5,2009,9,0.319966,22.81,...,8.089222,0.185249,0.104329,36.16469,36.27609,1.427391,0.067694,18.87075,21.37179,18.4447
2,TARA_007_DCM_0.22-1.6,ERR315856,0.22-1.6,37.0541,1.9478,42,2009,9,0.14131,25.325,...,8.185778,0.141622,0.193746,37.34,37.235588,1.763157,0.785809,19.365999,16.97089,15.9158
3,TARA_007_SRF_0.22-1.6,ERR315857,0.22-1.6,37.051,1.9378,5,2009,9,0.14131,25.325,...,8.185778,0.1379,0.186964,37.10259,37.01569,1.022139,0.0,19.365999,24.6326,19.1342
4,TARA_009_DCM_0.22-1.6,ERR594315|ERR594329,0.22-1.6,39.0609,5.9422,55,2009,9,0.084704,26.425,...,8.208055,0.11665,0.134084,37.70839,37.6838,2.03822,0.95047,19.48889,16.147,15.100591


In [328]:
env.to_csv("env_remote_data_TARA.csv", index=False)