In [1]:
import numpy as np
import math
from datetime import timedelta
from operator import attrgetter
from netCDF4 import Dataset
from netCDF4 import MFDataset
from collections import defaultdict
import xarray as xr
import pandas as pd
import os

In [3]:
'''
Written by Chad Valencia, chadvalencia@gmail.com

for David Lindo Atichati, PhD, CUNY

Dataset ETL

The Purpose of this file is to parse data into organized csvs for different visualizations. By doing so
we can cut down processing time of the dataset and visualize accordingly.
'''

'\nWritten by Chad Valencia, chadvalencia@gmail.com\n\nfor David Lindo Atichati, PhD, CUNY\n\nDataset ETL\n\nThe Purpose of this file is to parse data into organized csvs for different visualizations. By doing so\nwe can cut down processing time of the dataset and visualize accordingly.\n'

In [4]:
cycnc = Dataset('./output_tracking/Cyclonic.nc')
acycnc = Dataset('./output_tracking/Anticyclonic.nc')

In [5]:
def poslon(l):
    j = []
    for i in l:
        if i < 0:
            i=360+i
            j.append(i)
        else:
            j.append(i)
    return j
        
def dfnc(nc):
    '''
    This takes an nc dataset and creates a Pandas dataframe
    with columns eddy, date and position
    '''
    lat=list(nc['lat'])
    lon=poslon(list(nc['lon']))
    radius = list(nc['radius_e']) #radius in km
    amp = list(nc['A'])
    dates = list(nc['j1'])
    eddy = list(nc['track'])
    df = pd.DataFrame([eddy,dates,lat,lon,radius,amp]).T
    df.columns=['eddy','date','lat','lon','radius','amplitude']
    df['date']=pd.to_datetime(df['date'],origin='julian',unit='D')
    return df

In [6]:
cyc = dfnc(cycnc)
acyc = dfnc(acycnc)

In [7]:
def pos_compare(lat, lon):
    lat_in, lon_in = False, False

    if 15.92 < lat < 34.04043:
        lat_in = True
    if 176.04 < lon < 209.0341:
            lon_in = True
    return lat_in and lon_in

def truecol(df):
    l=[]
    for i in range(len(df)):
        if pos_compare(df['lat'][i],df['lon'][i]):
            l.append(True)
        else:
            l.append(False)
    df['truecol']=l
    df = df[df['truecol']==True]
    df = df.drop('truecol',axis=1)
    return df

In [8]:
cdf = truecol(cyc)
adf = truecol(acyc)

In [9]:
# Saving cdf to ./data/cdf.csv and adf to ./data/adf.csv
# cdf is a dataframe of Longitude-Adjusted cylconic eddies, with 1 degree removed on each border, unbinned.
# adf is a dataframe of Longitude-Adjusted cylconic eddies, with 1 degree removed on each border, unbinned.
cdf.to_csv('./data/cdf.csv')
adf.to_csv('./data/adf.csv')

In [2]:
### Skip to here if not first time running
cdf = pd.read_csv('./data/cdf.csv')
cdf.describe()

Unnamed: 0.1,Unnamed: 0,eddy,lat,lon,radius,amplitude
count,157718.0,157718.0,157718.0,157718.0,157718.0,157718.0
mean,87989.784508,4857.821295,25.285731,192.848019,47.682352,2.99654
std,50673.003517,2902.821074,5.290045,9.654817,27.229527,3.541591
min,0.0,1.0,15.920247,176.040161,15.0,0.054436
25%,44335.25,2428.0,20.78618,184.546986,27.2,0.853937
50%,88081.5,4727.0,25.169032,192.926651,39.6,1.79273
75%,131981.75,7352.0,29.955281,201.341152,60.75,3.756357
max,176047.0,10115.0,34.040428,209.034088,210.35,47.40176


In [3]:
adf = pd.read_csv('./data/adf.csv')
adf

Unnamed: 0.1,Unnamed: 0,eddy,date,lat,lon,radius,amplitude
0,0,1.0,2009-05-01 12:00:00,16.711275,201.793015,30.55,0.327685
1,1,1.0,2009-05-02 12:00:00,16.678257,201.710434,33.75,0.406118
2,2,1.0,2009-05-03 12:00:00,16.689449,201.585663,29.85,0.347254
3,3,1.0,2009-05-04 12:00:00,16.632107,201.512939,36.25,0.509210
4,4,1.0,2009-05-05 12:00:00,16.580832,201.447174,31.45,0.429348
5,5,1.0,2009-05-06 12:00:00,16.651991,201.352310,28.40,0.310445
6,6,2.0,2009-05-01 12:00:00,19.849276,207.901001,29.05,0.891934
7,7,2.0,2009-05-02 12:00:00,19.811481,207.849670,31.45,0.963795
8,8,2.0,2009-05-03 12:00:00,19.806429,207.796921,29.05,0.822990
9,9,2.0,2009-05-04 12:00:00,19.815142,207.750305,31.50,0.927562


In [12]:
def latsquish(df):
    #makes a dataframe with lat2 for binning
    dflat = df.copy()
    dflat['lat2']=dflat['lat']
    dflat.lat2 = dflat.lat2.astype(int)
    dfnew = dflat.groupby(['eddy','lat2'])
    result = dfnew.agg({
        'eddy':'first',
        'date':'count',
        'lat':np.mean,
        'lon':np.mean,
        'radius':np.mean,
        'amplitude':np.mean
    })
    return result

def latlonsquish(df):
    #makes a dataframe with lat2 for binning
    dflatlon= df.copy()
    dflatlon['lat2']=dflatlon['lat'].astype(int)
    dflatlon['lon2']=dflatlon['lon'].astype(int)
    dflatlon['latlon']=list(zip((dflatlon['lat'].astype(int)),(dflatlon['lon'].astype(int))))
    dfnew = dflatlon.groupby(['eddy','latlon'])
    result = dfnew.agg({
        'eddy':'first',
        'date':'count',
        'lat':np.mean,
        'lon':np.mean,
        'lat2':'first',
        'lon2':'first',
        'radius':np.mean,
        'amplitude':np.mean
    })
    return result

In [13]:
cldf = latsquish(cdf) # CE data for viz 5, flatten on latitude only
clldf = latlonsquish(cdf) # CE data for viz 4,6,7, flatten on both lat/lon 1 degree bins
aldf = latsquish(adf) # AE data for 5
alldf = latlonsquish(adf) #AE data for 4,6,7

In [14]:
cldf.to_csv('./data/cldf.csv')
clldf.to_csv('./data/clldf.csv')
aldf.to_csv('./data/aldf.csv')
alldf.to_csv('./data/alldf.csv')

cldf = pd.read_csv('./data/cldf.csv')
clldf = pd.read_csv('./data/clldf.csv')
aldf = pd.read_csv('./data/aldf.csv')
alldf = pd.read_csv('./data/alldf.csv')

In [15]:
#The Following Dataframe is for Vis 6, calculating non-linearity

In [16]:
def dfdist(nc):
    '''
    This takes an nc dataset and creates a Pandas dataframe
    with columns eddy, date and position
    '''
    lat=list(nc['lat'])
    lon=poslon(list(nc['lon']))
    u = list(nc['U'])
    dates = list(nc['j1'])
    eddy = list(nc['track'])
    df = pd.DataFrame([eddy,dates,lat,lon,u]).T
    df.columns=['eddy','date','lat','lon','u']
    df['date']=pd.to_datetime(df['date'],origin='julian',unit='D')
    return df

In [17]:
adist = truecol(dfdist(acycnc))
cdist = truecol(dfdist(cycnc))
adist.to_csv('./data/adist.csv')
cdist.to_csv('./data/cdist.csv')
adist = pd.read_csv('./data/adist.csv')
cdist = pd.read_csv('./data/cdist.csv')

In [8]:
latbins = np.arange(15.75,34.25,.25)
latbins

array([15.75, 16.  , 16.25, 16.5 , 16.75, 17.  , 17.25, 17.5 , 17.75,
       18.  , 18.25, 18.5 , 18.75, 19.  , 19.25, 19.5 , 19.75, 20.  ,
       20.25, 20.5 , 20.75, 21.  , 21.25, 21.5 , 21.75, 22.  , 22.25,
       22.5 , 22.75, 23.  , 23.25, 23.5 , 23.75, 24.  , 24.25, 24.5 ,
       24.75, 25.  , 25.25, 25.5 , 25.75, 26.  , 26.25, 26.5 , 26.75,
       27.  , 27.25, 27.5 , 27.75, 28.  , 28.25, 28.5 , 28.75, 29.  ,
       29.25, 29.5 , 29.75, 30.  , 30.25, 30.5 , 30.75, 31.  , 31.25,
       31.5 , 31.75, 32.  , 32.25, 32.5 , 32.75, 33.  , 33.25, 33.5 ,
       33.75, 34.  ])

In [10]:
latlabels = np.arange(15.75,34.5,.25)
latlabels

array([15.75, 16.  , 16.25, 16.5 , 16.75, 17.  , 17.25, 17.5 , 17.75,
       18.  , 18.25, 18.5 , 18.75, 19.  , 19.25, 19.5 , 19.75, 20.  ,
       20.25, 20.5 , 20.75, 21.  , 21.25, 21.5 , 21.75, 22.  , 22.25,
       22.5 , 22.75, 23.  , 23.25, 23.5 , 23.75, 24.  , 24.25, 24.5 ,
       24.75, 25.  , 25.25, 25.5 , 25.75, 26.  , 26.25, 26.5 , 26.75,
       27.  , 27.25, 27.5 , 27.75, 28.  , 28.25, 28.5 , 28.75, 29.  ,
       29.25, 29.5 , 29.75, 30.  , 30.25, 30.5 , 30.75, 31.  , 31.25,
       31.5 , 31.75, 32.  , 32.25, 32.5 , 32.75, 33.  , 33.25, 33.5 ,
       33.75, 34.  , 34.25])

In [11]:
lonbins = np.arange(176,209.5,.5)
lonbins

array([176. , 176.5, 177. , 177.5, 178. , 178.5, 179. , 179.5, 180. ,
       180.5, 181. , 181.5, 182. , 182.5, 183. , 183.5, 184. , 184.5,
       185. , 185.5, 186. , 186.5, 187. , 187.5, 188. , 188.5, 189. ,
       189.5, 190. , 190.5, 191. , 191.5, 192. , 192.5, 193. , 193.5,
       194. , 194.5, 195. , 195.5, 196. , 196.5, 197. , 197.5, 198. ,
       198.5, 199. , 199.5, 200. , 200.5, 201. , 201.5, 202. , 202.5,
       203. , 203.5, 204. , 204.5, 205. , 205.5, 206. , 206.5, 207. ,
       207.5, 208. , 208.5, 209. ])

In [17]:
cdf['lat2']=pd.cut(cdf['lat'],bins=latlabels,labels=latbins,right=False)
cdf['lat2']=cdf['lat2'].astype(float)

In [20]:
cdf.head(5)

Unnamed: 0.1,Unnamed: 0,eddy,date,lat,lon,radius,amplitude,lat2
0,0,1.0,2009-05-01 12:00:00,26.569246,201.323822,23.9,0.713694,26.5
1,1,1.0,2009-05-02 12:00:00,26.591457,201.289383,21.55,0.61936,26.5
2,2,1.0,2009-05-03 12:00:00,26.602619,201.271347,19.1,0.580072,26.5
3,3,1.0,2009-05-04 12:00:00,26.615175,201.254868,16.6,0.493775,26.5
4,4,2.0,2009-05-01 12:00:00,30.712801,177.31134,23.85,2.970304,30.5


In [32]:
def latbyinterval(df, interval):
    #binning by intervals of _interval_ degrees latitude, for possible values of _interval_ in range [.1, .5]
    dflat = df.copy()
    #creating bins, labels
    lat_bins = np.arange(16-interval, 34+(2*interval), interval)
    lat_labels = np.arange(16-interval, 34+interval, interval)
    dflat['lat2']=pd.cut(df['lat'],bins=lat_bins,labels=lat_labels,right=False)
    dflat['lat2']=dflat['lat2'].astype(float)
    dfnew = dflat.groupby(['eddy','lat2'])
    result = dfnew.agg({
        'eddy':'first',
        'date':'count',
        'lat':np.mean,
        'lon':np.mean,
        'radius':np.mean,
        'amplitude':np.mean
    })
    return result

def latlonbyinterval(df, interval):
    #binning by intervals of _interval_ degrees latitude, longitude as described above
    dflatlon= df.copy()
    #creating bins, labels
    lat_bins = np.arange(16-interval, 34+(2*interval), interval)
    lat_labels = np.arange(16-interval, 34+interval, interval)
    lon_bins = np.arange(176, 209+(2*interval), interval)
    lon_labels = np.arange(176, 209+interval, interval)
    
    dflatlon['lat2']=pd.cut(df['lat'],bins=lat_bins,labels=lat_labels,right=False)
    dflatlon['lon2']=pd.cut(df['lon'],bins=lon_bins,labels=lon_labels,right=False)
    
    dflatlon['latlon']=list(zip((dflatlon['lat2'].astype(float)),(dflatlon['lon2'].astype(float))))
    dfnew = dflatlon.groupby(['eddy','latlon'])
    result = dfnew.agg({
        'eddy':'first',
        'date':'count',
        'lat':np.mean,
        'lon':np.mean,
        'lat2':'first',
        'lon2':'first',
        'radius':np.mean,
        'amplitude':np.mean
    })
    return result

In [33]:
cl500 = latbyinterval(cdf,.5)
cl500.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,eddy,date,lat,lon,radius,amplitude
eddy,lat2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.0,26.5,1.0,4,26.594625,201.284855,20.2875,0.601725
2.0,30.5,2.0,20,30.724302,176.935381,32.8325,4.688642
2.0,31.0,2.0,8,31.233706,176.476311,33.90625,4.497819
2.0,31.5,2.0,4,31.573346,176.576569,27.1375,2.745743
3.0,28.0,3.0,18,28.22082,200.928969,38.45,0.914883
4.0,29.5,4.0,9,29.978857,200.682534,32.122222,0.968564
4.0,30.0,4.0,1,30.006189,200.675964,23.3,0.498598
5.0,28.5,5.0,61,28.761916,202.819099,106.253279,5.381142
6.0,30.0,6.0,6,30.347652,196.078252,24.383333,0.545872
6.0,30.5,6.0,7,30.683281,196.05016,33.778571,1.393218


In [34]:
cll500 = latlonbyinterval(cdf,.5)
cll500.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,eddy,date,lat,lon,lat2,lon2,radius,amplitude
eddy,latlon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.0,"(26.5, 201.0)",1.0,4,26.594625,201.284855,26.5,201.0,20.2875,0.601725
2.0,"(30.5, 176.5)",2.0,10,30.796874,176.718651,30.5,176.5,40.3,5.993165
2.0,"(30.5, 177.0)",2.0,10,30.651731,177.152112,30.5,177.0,25.365,3.38412
2.0,"(31.0, 176.0)",2.0,8,31.233706,176.476311,31.0,176.0,33.90625,4.497819
2.0,"(31.5, 176.5)",2.0,4,31.573346,176.576569,31.5,176.5,27.1375,2.745743
3.0,"(28.0, 200.5)",3.0,16,28.216197,200.916548,28.0,200.5,40.5875,0.998816
3.0,"(28.0, 201.0)",3.0,2,28.257804,201.028336,28.0,201.0,21.35,0.243419
4.0,"(29.5, 200.5)",4.0,9,29.978857,200.682534,29.5,200.5,32.122222,0.968564
4.0,"(30.0, 200.5)",4.0,1,30.006189,200.675964,30.0,200.5,23.3,0.498598
5.0,"(28.5, 202.0)",5.0,12,28.665731,202.238265,28.5,202.0,46.095833,1.120982


In [35]:
al500 = latbyinterval(adf,.5)
all500 = latlonbyinterval(adf,.5)

In [36]:
al500.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,eddy,date,lat,lon,radius,amplitude
eddy,lat2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.0,16.5,1.0,6,16.657318,201.566923,31.708333,0.388343
2.0,19.5,2.0,14,19.853656,207.602446,29.235714,0.705279
3.0,30.0,3.0,25,30.216285,186.287147,82.908,4.167622
4.0,24.0,4.0,17,24.305204,185.903321,78.258824,4.180908
5.0,32.5,5.0,8,32.96653,200.148359,22.36875,0.592666
6.0,33.0,6.0,5,33.464009,193.418091,24.97,0.916772
6.0,33.5,6.0,20,33.683961,193.571555,52.7425,3.132255
6.0,34.0,6.0,2,34.018394,193.730652,53.575,4.959682
7.0,33.0,7.0,4,33.335132,182.369671,18.5875,0.413238
8.0,32.5,8.0,6,32.775158,194.887245,50.541667,2.596021


In [37]:
all500.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,eddy,date,lat,lon,lat2,lon2,radius,amplitude
eddy,latlon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.0,"(16.5, 201.0)",1.0,2,16.616411,201.399742,16.5,201.0,29.925,0.369896
1.0,"(16.5, 201.5)",1.0,4,16.677772,201.650513,16.5,201.5,32.6,0.397567
2.0,"(19.5, 207.0)",2.0,4,19.908663,207.378414,19.5,207.0,32.0125,0.569734
2.0,"(19.5, 207.5)",2.0,10,19.831654,207.692059,19.5,207.5,28.125,0.759498
3.0,"(30.0, 185.5)",3.0,2,30.068904,185.98423,30.0,185.5,58.7,1.142662
3.0,"(30.0, 186.0)",3.0,22,30.222565,186.304728,30.0,186.0,84.834091,4.462151
3.0,"(30.0, 186.5)",3.0,1,30.372881,186.50621,30.0,186.5,88.95,3.737907
4.0,"(24.0, 185.5)",4.0,14,24.295446,185.870518,24.0,185.5,76.925,3.938153
4.0,"(24.0, 186.0)",4.0,3,24.350739,186.056402,24.0,186.0,84.483333,5.313764
5.0,"(32.5, 200.0)",5.0,8,32.96653,200.148359,32.5,200.0,22.36875,0.592666


In [38]:
cl500.to_csv('./data/cl500.csv')
cll500.to_csv('./data/cll500.csv')
al500.to_csv('./data/al500.csv')
all500.to_csv('./data/all500.csv')

In [39]:
cl250 = latbyinterval(cdf,.25)
cll250 = latlonbyinterval(cdf,.25)

In [40]:
cl250.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,eddy,date,lat,lon,radius,amplitude
eddy,lat2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.0,26.5,1.0,4,26.594625,201.284855,20.2875,0.601725
2.0,30.5,2.0,14,30.668656,177.069002,29.603571,4.13789
2.0,30.75,2.0,6,30.854144,176.623599,40.366667,5.973732
2.0,31.0,2.0,4,31.112514,176.478188,35.7875,4.966158
2.0,31.25,2.0,4,31.354898,176.474434,32.025,4.02948
2.0,31.5,2.0,4,31.573346,176.576569,27.1375,2.745743
3.0,28.0,3.0,12,28.201157,200.896417,45.925,1.245711
3.0,28.25,3.0,6,28.260147,200.994072,23.5,0.253228
4.0,29.75,4.0,9,29.978857,200.682534,32.122222,0.968564
4.0,30.0,4.0,1,30.006189,200.675964,23.3,0.498598


In [41]:
cll250.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,eddy,date,lat,lon,lat2,lon2,radius,amplitude
eddy,latlon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.0,"(26.5, 201.25)",1.0,4,26.594625,201.284855,26.5,201.25,20.2875,0.601725
2.0,"(30.5, 176.75)",2.0,4,30.710968,176.861229,30.5,176.75,40.2,6.022315
2.0,"(30.5, 177.0)",2.0,8,30.640117,177.118954,30.5,177.0,25.70625,3.479141
2.0,"(30.5, 177.25)",2.0,2,30.698187,177.284744,30.5,177.25,24.0,3.004032
2.0,"(30.75, 176.5)",2.0,6,30.854144,176.623599,30.75,176.5,40.366667,5.973732
2.0,"(31.0, 176.25)",2.0,4,31.112514,176.478188,31.0,176.25,35.7875,4.966158
2.0,"(31.25, 176.25)",2.0,4,31.354898,176.474434,31.25,176.25,32.025,4.02948
2.0,"(31.5, 176.5)",2.0,4,31.573346,176.576569,31.5,176.5,27.1375,2.745743
3.0,"(28.0, 200.75)",3.0,12,28.201157,200.896417,28.0,200.75,45.925,1.245711
3.0,"(28.25, 200.75)",3.0,4,28.261319,200.97694,28.25,200.75,24.575,0.258133


In [52]:
al250 = latbyinterval(adf,.25)
all250 = latlonbyinterval(adf,.25)

In [53]:
al250.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,eddy,date,lat,lon,radius,amplitude
eddy,lat2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.0,16.5,1.0,6,16.657318,201.566923,31.708333,0.388343
2.0,19.75,2.0,14,19.853656,207.602446,29.235714,0.705279
3.0,30.0,3.0,18,30.174631,186.251764,82.877778,4.253488
3.0,30.25,3.0,7,30.323394,186.378132,82.985714,3.946824
4.0,24.0,4.0,5,24.218324,185.808673,60.88,2.190044
4.0,24.25,4.0,12,24.341404,185.942758,85.5,5.010434
5.0,32.75,5.0,8,32.96653,200.148359,22.36875,0.592666
6.0,33.25,6.0,5,33.464009,193.418091,24.97,0.916772
6.0,33.5,6.0,14,33.611192,193.52246,52.435714,2.859873
6.0,33.75,6.0,6,33.853755,193.686111,53.458333,3.767813


In [54]:
all250.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,eddy,date,lat,lon,lat2,lon2,radius,amplitude
eddy,latlon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.0,"(16.5, 201.25)",1.0,2,16.616411,201.399742,16.5,201.25,29.925,0.369896
1.0,"(16.5, 201.5)",1.0,3,16.666604,201.603012,16.5,201.5,33.283333,0.42086
1.0,"(16.5, 201.75)",1.0,1,16.711275,201.793015,16.5,201.75,30.55,0.327685
2.0,"(19.75, 207.25)",2.0,4,19.908663,207.378414,19.75,207.25,32.0125,0.569734
2.0,"(19.75, 207.5)",2.0,6,19.839035,207.603783,19.75,207.5,26.7,0.664782
2.0,"(19.75, 207.75)",2.0,4,19.820582,207.824474,19.75,207.75,30.2625,0.90157
3.0,"(30.0, 185.75)",3.0,2,30.068904,185.98423,30.0,185.75,58.7,1.142662
3.0,"(30.0, 186.0)",3.0,3,30.132936,186.049082,30.0,186.0,45.4,0.625781
3.0,"(30.0, 186.25)",3.0,13,30.200518,186.339696,30.0,186.25,95.246154,5.56924
3.0,"(30.25, 186.25)",3.0,6,30.315146,186.356786,30.25,186.25,81.991667,3.981644


In [55]:
cl250.to_csv('./data/cl250.csv')
cll250.to_csv('./data/cll250.csv')
al250.to_csv('./data/al250.csv')
all250.to_csv('./data/all250.csv')

In [50]:
cl500.describe()

Unnamed: 0,eddy,date,lat,lon,radius,amplitude
count,15727.0,15727.0,15727.0,15727.0,15727.0,15727.0
mean,5032.799453,10.028486,24.882371,192.82019,37.340079,1.989872
std,2913.755604,12.268524,5.460813,9.933905,19.70245,2.464295
min,1.0,1.0,15.920351,176.040176,15.1,0.075814
25%,2517.0,3.0,20.238142,184.172009,23.98875,0.6299
50%,4971.0,6.0,24.753272,192.830303,30.835714,1.176196
75%,7589.0,12.0,29.649854,201.671282,44.1275,2.334768
max,10115.0,230.0,34.040257,209.034012,193.883333,32.674241


In [51]:
cl250.describe()

Unnamed: 0,eddy,date,lat,lon,radius,amplitude
count,21683.0,21683.0,21683.0,21683.0,21683.0,21683.0
mean,5024.313702,7.273809,24.696549,192.576355,38.593892,2.148475
std,2915.82057,8.603449,5.441243,9.971606,20.995994,2.676179
min,1.0,1.0,15.920351,176.040176,15.1,0.075814
25%,2509.0,2.0,20.045969,183.82034,24.136607,0.651057
50%,4943.0,5.0,24.451088,192.506736,31.6,1.256779
75%,7595.5,9.0,29.347846,201.445015,46.175,2.51578
max,10115.0,156.0,34.040257,209.034012,205.25,40.592589


In [56]:
cl100 = latbyinterval(cdf,.1)
cll100 = latlonbyinterval(cdf,.1)
al100 = latbyinterval(adf,.1)
all100 = latlonbyinterval(adf,.1)

In [57]:
cll100.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,eddy,date,lat,lon,lat2,lon2,radius,amplitude
eddy,latlon,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.0,"(26.499999999999964, 201.19999999999857)",1.0,1,26.591457,201.289383,26.5,201.2,21.55,0.61936
1.0,"(26.499999999999964, 201.29999999999856)",1.0,1,26.569246,201.323822,26.5,201.3,23.9,0.713694
1.0,"(26.599999999999962, 201.19999999999857)",1.0,2,26.608897,201.263107,26.6,201.2,17.85,0.536923
2.0,"(30.599999999999948, 176.79999999999995)",2.0,1,30.697508,176.888123,30.6,176.8,35.05,5.249845
2.0,"(30.599999999999948, 176.89999999999995)",2.0,1,30.680347,176.941177,30.6,176.9,32.95,4.948963
2.0,"(30.599999999999948, 176.99999999999994)",2.0,3,30.647703,177.047516,30.6,177.0,27.75,4.032741
2.0,"(30.599999999999948, 177.09999999999994)",2.0,4,30.629448,177.14888,30.6,177.1,23.8125,3.103351
2.0,"(30.599999999999948, 177.19999999999993)",2.0,2,30.671803,177.235855,30.6,177.2,25.65,3.17963
2.0,"(30.699999999999946, 176.59999999999997)",2.0,1,30.792746,176.687653,30.7,176.6,42.35,6.35514
2.0,"(30.699999999999946, 176.69999999999996)",2.0,2,30.758017,176.756393,30.7,176.7,44.375,6.716017


In [58]:
cll100.describe()

Unnamed: 0,eddy,date,lat,lon,lat2,lon2,radius,amplitude
count,79578.0,79578.0,79578.0,79578.0,79578.0,79578.0,79578.0,79578.0
mean,4893.511938,1.98193,24.300305,192.429884,24.250425,192.379898,45.335617,2.853735
std,2908.295904,1.693106,5.383634,9.907533,5.383898,9.907635,26.599498,3.491975
min,1.0,1.0,15.920277,176.040161,15.9,176.0,15.1,0.071151
25%,2432.0,1.0,19.759119,183.687443,19.7,183.6,25.9,0.792546
50%,4774.0,1.0,23.692788,192.379219,23.6,192.3,36.75,1.651185
75%,7417.75,2.0,28.718612,201.23289,28.7,201.2,56.6975,3.492348
max,10115.0,35.0,34.040352,209.034012,34.0,209.0,210.35,47.40176


In [59]:
all100.describe()

Unnamed: 0,eddy,date,lat,lon,lat2,lon2,radius,amplitude
count,65501.0,65501.0,65501.0,65501.0,65501.0,65501.0,65501.0,65501.0
mean,4245.952505,1.960062,24.637033,192.601497,24.58699,192.551662,67.905901,3.223954
std,2466.704209,1.623123,5.484895,10.054986,5.48506,10.0551,53.108282,3.527557
min,1.0,1.0,15.920214,176.041534,15.9,176.0,15.2,0.064346
25%,2167.0,1.0,19.750168,183.629883,19.7,183.6,31.85,0.76033
50%,4235.0,1.0,24.26371,192.658,24.2,192.6,50.975,1.813905
75%,6358.0,2.0,29.385326,201.648132,29.3,201.6,86.516667,4.434821
max,8632.0,26.0,34.040428,209.033997,34.0,209.0,619.45,31.552294


In [60]:
cl100.to_csv('./data/cl100.csv')
cll100.to_csv('./data/cll100.csv')
al100.to_csv('./data/al100.csv')
all100.to_csv('./data/all100.csv')