## SSH Eddies

In [1]:
import datetime
import jdcal
import numpy as np
import pandas as pd
import xarray as xr

In [2]:
ds = xr.open_dataset('/home/cz2397/data/ssh-eddies/tracks.20130125.nc', decode_cf=False)

In [3]:
ds

<xarray.Dataset>
Dimensions:  (Nobs: 2590938)
Coordinates:
  * Nobs     (Nobs) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 ...
Data variables:
    track    (Nobs) int32 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5 6 6 6 6 7 ...
    n        (Nobs) int32 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 ...
    j1       (Nobs) int32 2448910 2448917 2448924 2448931 2448910 2448917 ...
    cyc      (Nobs) int32 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
    lon      (Nobs) float32 636.74 635.99 635.82 635.845 560.642 561.447 ...
    lat      (Nobs) float32 -65.0011 -65.1376 -65.2457 -65.2933 -62.5424 ...
    A        (Nobs) float32 1.24564 2.64604 3.40119 1.45836 2.16156 4.87579 ...
    L        (Nobs) float32 44.464 36.7024 45.4221 43.022 63.0697 51.1345 ...
    U        (Nobs) float32 5.97993 8.17373 8.28628 5.20364 4.81408 6.59872 ...
Attributes:
    title: Mesoscale Eddies in Altimeter Observations of SSH
    institution: Oregon State University/CEOAS
    file_name:

In [4]:
df = ds.to_dataframe()

In [5]:
df.head()

Unnamed: 0_level_0,track,n,j1,cyc,lon,lat,A,L,U
Nobs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1,1,2448910,-1,636.73999,-65.001099,1.24564,44.464001,5.97993
1,1,2,2448917,-1,635.98999,-65.137604,2.64604,36.7024,8.17373
2,1,3,2448924,-1,635.820007,-65.245697,3.40119,45.4221,8.28628
3,1,4,2448931,-1,635.844971,-65.293297,1.45836,43.021999,5.20364
4,2,1,2448910,-1,560.642029,-62.5424,2.16156,63.069698,4.81408


In [6]:
def jday_to_datetime(jday, refday=0):
    y, m, d, f = jdcal.jd2gcal(jday, refday)
    h = int(f*24)
    return pd.to_datetime(datetime.datetime(y, m, d, h))

In [7]:
pd_date = df.j1.apply(jday_to_datetime)

In [8]:
df.j1 = pd_date
df = df.rename(columns = {'j1': 'date'})

In [9]:
df.head()

Unnamed: 0_level_0,track,n,date,cyc,lon,lat,A,L,U
Nobs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1,1,1992-10-14 12:00:00,-1,636.73999,-65.001099,1.24564,44.464001,5.97993
1,1,2,1992-10-21 12:00:00,-1,635.98999,-65.137604,2.64604,36.7024,8.17373
2,1,3,1992-10-28 12:00:00,-1,635.820007,-65.245697,3.40119,45.4221,8.28628
3,1,4,1992-11-04 12:00:00,-1,635.844971,-65.293297,1.45836,43.021999,5.20364
4,2,1,1992-10-14 12:00:00,-1,560.642029,-62.5424,2.16156,63.069698,4.81408


In [10]:
lon_fix_01 = df.where(df.lon <= 540).lon - 360
lon_fix_02 = df.where(df.lon > 540).lon - 720
lon_fix_A = lon_fix_01
lon_fix_A = lon_fix_A.fillna(lon_fix_02)
df.lon = lon_fix_A

In [11]:
df.head()

Unnamed: 0_level_0,track,n,date,cyc,lon,lat,A,L,U
Nobs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1,1,1992-10-14 12:00:00,-1,-83.26001,-65.001099,1.24564,44.464001,5.97993
1,1,2,1992-10-21 12:00:00,-1,-84.01001,-65.137604,2.64604,36.7024,8.17373
2,1,3,1992-10-28 12:00:00,-1,-84.179993,-65.245697,3.40119,45.4221,8.28628
3,1,4,1992-11-04 12:00:00,-1,-84.155029,-65.293297,1.45836,43.021999,5.20364
4,2,1,1992-10-14 12:00:00,-1,-159.357971,-62.5424,2.16156,63.069698,4.81408


In [12]:
lon_fix_03 = df.where(df.lon < 0).lon + 360
lon_fix_04 = df.where(df.lon >= 0).lon
lon_fix_B = lon_fix_03
lon_fix_B = lon_fix_B.fillna(lon_fix_04)
df.lon = lon_fix_B

In [13]:
df.head()

Unnamed: 0_level_0,track,n,date,cyc,lon,lat,A,L,U
Nobs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,1,1,1992-10-14 12:00:00,-1,276.73999,-65.001099,1.24564,44.464001,5.97993
1,1,2,1992-10-21 12:00:00,-1,275.98999,-65.137604,2.64604,36.7024,8.17373
2,1,3,1992-10-28 12:00:00,-1,275.820007,-65.245697,3.40119,45.4221,8.28628
3,1,4,1992-11-04 12:00:00,-1,275.844971,-65.293297,1.45836,43.021999,5.20364
4,2,1,1992-10-14 12:00:00,-1,200.642029,-62.5424,2.16156,63.069698,4.81408


In [14]:
track = np.asarray(df.track)
n = np.asarray(df.n)
date = df.date
lon = np.asarray(df.lon)
lat = np.asarray(df.lat)

## PyMongo

In [15]:
from pymongo import MongoClient
from tqdm import tqdm

In [16]:
client = MongoClient()

In [17]:
client.database_names()

[u'eddies', u'local']

In [18]:
db = client.eddies

In [19]:
db.collection_names()

[u'rcs_eddies', u'cchdo', u'ssh_eddies']

In [20]:
collection = db.cchdo

In [21]:
result = db.cchdo.remove()



In [22]:
M = max(df.track)
print(M)

215184


In [23]:
N = len(df.n)
print(N)

2590938


In [24]:
for i in tqdm(range(N)):
    eddy = {
        'eid': str(track[i]) + "-" + str(n[i]),
        'dat': date[i],
        'lat': str(lat[i]),
        'lon': str(lon[i])
    }
    result = db.cchdo.insert_one(dict(eddy))

100%|██████████| 2590938/2590938 [11:47<00:00, 3660.72it/s]


## CCHDO

In [25]:
ds = xr.open_dataset('/home/cz2397/data/cchdo-hyd/698_33RO20150410_nc_hyd/33RO20150410_00001_00001_hy1.nc')

In [29]:
ds

<xarray.Dataset>
Dimensions:                 (latitude: 1, longitude: 1, pressure: 24, time: 1)
Coordinates:
  * pressure                (pressure) float64 3.1 20.1 41.7 66.4 91.6 117.0 ...
  * time                    (time) datetime64[ns] 2015-04-11T05:50:00
  * latitude                (latitude) float32 -16.5011
  * longitude               (longitude) float32 -150.001
Data variables:
    temperature             (pressure) float64 29.13 29.14 29.08 27.5 25.72 ...
    salinity                (pressure) float64 35.66 35.66 35.72 35.85 36.17 ...
    salinity_QC             (pressure) int16 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ...
    bottle_salinity         (pressure) float64 35.65 35.66 35.72 35.85 36.17 ...
    bottle_salinity_QC      (pressure) int16 2 2 2 2 2 2 2 2 2 2 2 6 2 2 9 2 ...
    oxygen                  (pressure) float64 194.1 195.7 195.6 204.5 196.9 ...
    oxygen_QC               (pressure) int16 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ...
    bottle_oxygen           (pressure) float6

In [31]:
DAT = ds.time.values[0]

In [32]:
LAT = ds.latitude.values[0]

In [33]:
LON = ds.longitude.values[0]