In [1]:
import pandas as pd

In [2]:
def get_cruise_stations(cruise):
    url = 'https://nes-lter-data.whoi.edu/api/stations/{}.csv'.format(cruise)
    return pd.read_csv(url)

In [4]:
station_metadata = get_cruise_stations('en617')
station_metadata.head()

Unnamed: 0,long_name,name,latitude,longitude,depth,comments
0,LTER1,L1,41.1967,-70.8833,21.0,
1,LTER2,L2,41.03,-70.8833,47.0,
2,LTER3,L3,40.8633,-70.8833,56.0,
3,LTER4,L4,40.6967,-70.8833,66.0,
4,LTER5,L5,40.5133,-70.8833,75.0,Nudged south to avoid shipping channel


In [6]:
from geopy.distance import distance as geo_distance

class StationLocator(object):
    def __init__(self, cruise):
        self.station_metadata = get_cruise_stations(cruise.lower())

    def station_distances(self, lat, lon):
        distances = []
        index = []
        for station in self.station_metadata.itertuples():
            index.append(station.Index)
            distance = geo_distance([lat,lon], [station.latitude, station.longitude]).km
            distances.append(distance)
        distances = pd.Series(distances, index=index)
        return distances

    def nearest_station(self, lat, lon):
        distances = self.station_distances(lat, lon)
        i = distances.idxmin()
        distance = distances.loc[i]
        station_name = self.station_metadata['name'][i]
        return station_name, distance
    
    def nearest_stations(self, df, lat_col='latitude', lon_col='longitude'):
        names, distances, index = [], [], []
        for row in df.itertuples():
            lat = getattr(row, lat_col)
            lon = getattr(row, lon_col)
            name, distance = self.nearest_station(lat, lon)
            names.append(name)
            distances.append(distance)
        df = df.copy()
        df['nearest_station'] = names
        df['station_distance'] = distances
        return df
            
locator = StationLocator('en617')
locator.nearest_station(40.1967, -70.8833)

('L7', 3.3311611084874575)

In [8]:
def add_nearest_stations(cruise):
    # url = 'nes-lter-chl-transect.csv'.format(cruise.lower())
    file = pd.read_csv('nes-lter-chl-transect.csv')
    dat = file[(file['cruise'] == cruise)]
    locator = StationLocator(cruise)
    return locator.nearest_stations(dat)

add_nearest_stations('EN617').head()

Unnamed: 0,cruise,cast,niskin,bottle_other_method,date,latitude,longitude,depth,replicate,vol_filtered,...,fd_calibration,rb,ra,blank,rb_blank,ra_blank,chl,phaeo,nearest_station,station_distance
499,EN617,1,1,,2018-07-20 17:30:51,41.20191,-70.88546,17.739,a,290.0,...,0.484237,229.6,147.4,0.6432,228.9568,146.7568,1.447111,0.883352,L1,0.606315
500,EN617,1,1,,2018-07-20 17:30:51,41.20191,-70.88546,17.739,b,290.0,...,0.484237,236.8,149.1,0.6432,236.1568,148.4568,1.543937,0.813522,L1,0.606315
501,EN617,1,5,,2018-07-20 17:33:46,41.20244,-70.88544,12.177,a,285.0,...,0.484237,290.8,181.7,0.6432,290.1568,181.0568,1.954375,0.971205,L1,0.662263
502,EN617,1,5,,2018-07-20 17:33:46,41.20244,-70.88544,12.177,b,285.0,...,0.484237,308.4,192.9,0.6432,307.7568,192.2568,2.069022,1.037532,L1,0.662263
503,EN617,1,8,,2018-07-20 17:36:20,41.20284,-70.88546,7.261,a,287.0,...,0.484237,195.5,121.9,0.6432,194.8568,121.2568,1.309254,0.636403,L1,0.705555


In [9]:
cruises = ['EN608','EN617','EN627', 'EN644', 'AR22', 'AR32']

result = pd.concat([add_nearest_stations(cruise) for cruise in cruises])
result.head()

Unnamed: 0,cruise,cast,niskin,bottle_other_method,date,latitude,longitude,depth,replicate,vol_filtered,...,fd_calibration,rb,ra,blank,rb_blank,ra_blank,chl,phaeo,nearest_station,station_distance
264,EN608,1,2,,2018-01-31 19:34:45,41.19634,-70.87844,17.48,a,282.0,...,0.484237,516.8,298.9,1.052667,515.747333,297.847333,5.52286,1.28663,L1,0.409636
265,EN608,1,2,,2018-01-31 19:34:45,41.19634,-70.87844,17.48,b,282.0,...,0.484237,561.5,326.7,1.052667,560.447333,325.647333,5.951205,1.493858,L1,0.409636
266,EN608,1,5,,2018-01-31 19:37:05,41.19636,-70.87831,11.591,a,287.0,...,0.484237,493.5,295.2,1.052667,492.447333,294.147333,4.938519,1.669221,L1,0.420285
267,EN608,1,5,,2018-01-31 19:37:05,41.19636,-70.87831,11.591,b,285.0,...,0.484237,514.4,298.7,1.052667,513.347333,297.647333,5.409551,1.323736,L1,0.420285
268,EN608,1,9,,2018-01-31 19:40:43,41.19636,-70.87806,7.022,a,288.0,...,0.484237,508.7,296.8,1.052667,507.647333,295.747333,5.258894,1.361721,L1,0.441175


In [10]:
OUTPUT_FILE = 'py_processed_transect.csv'

result.to_csv(OUTPUT_FILE, index=None)