In [1]:
import pandas as pd

In [2]:
def get_cruise_stations(cruise):
    url = 'https://nes-lter-data.whoi.edu/api/stations/{}.csv'.format(cruise)
    return pd.read_csv(url)

In [3]:
station_metadata = get_cruise_stations('en617')
station_metadata.head()

Unnamed: 0,long_name,name,latitude,longitude,depth,comments
0,LTER1,L1,41.1967,-70.8833,21.0,
1,LTER2,L2,41.03,-70.8833,47.0,
2,LTER3,L3,40.8633,-70.8833,56.0,
3,LTER4,L4,40.6967,-70.8833,66.0,
4,LTER5,L5,40.5133,-70.8833,75.0,Nudged south to avoid shipping channel


In [4]:
from geopy.distance import distance as geo_distance

class StationLocator(object):
    def __init__(self, cruise):
        self.station_metadata = get_cruise_stations(cruise.lower())

    def station_distances(self, lat, lon):
        distances = []
        index = []
        for station in self.station_metadata.itertuples():
            index.append(station.Index)
            distance = geo_distance([lat,lon], [station.latitude, station.longitude]).km
            distances.append(distance)
        distances = pd.Series(distances, index=index)
        return distances

    def nearest_station(self, lat, lon):
        distances = self.station_distances(lat, lon)
        i = distances.idxmin()
        distance = distances.loc[i]
        station_name = self.station_metadata['name'][i]
        return station_name, distance
    
    def nearest_stations(self, df, lat_col='latitude', lon_col='longitude'):
        names, distances, index = [], [], []
        for row in df.itertuples():
            lat = getattr(row, lat_col)
            lon = getattr(row, lon_col)
            name, distance = self.nearest_station(lat, lon)
            names.append(name)
            distances.append(distance)
        df = df.copy()
        df['nearest_station'] = names
        df['station_distance'] = distances
        return df
            
locator = StationLocator('en617')
locator.nearest_station(40.1967, -70.8833)

('L7', 3.3311611084874575)

In [5]:
def nut_nearest_stations(cruise):
    url = 'https://nes-lter-data.whoi.edu/api/nut/{}.csv'.format(cruise.lower())
    nut = pd.read_csv(url)
    locator = StationLocator(cruise)
    return locator.nearest_stations(nut)

nut_nearest_stations('en617').head()

Unnamed: 0,cruise,cast,niskin,date,latitude,longitude,depth,sample_id,replicate,nitrate_nitrite,ammonium,phosphate,silicate,nearest_station,station_distance
0,EN617,1,1,2018-07-20 17:30:51+00:00,41.20191,-70.88546,17.739,571,a,0.164754,0.0,0.316859,4.411799,L1,0.606315
1,EN617,1,1,2018-07-20 17:30:51+00:00,41.20191,-70.88546,17.739,572,b,0.200484,0.0,0.319452,4.400405,L1,0.606315
2,EN617,1,5,2018-07-20 17:33:46+00:00,41.20244,-70.88544,12.177,573,a,0.065505,0.0,0.233366,3.884538,L1,0.662263
3,EN617,1,5,2018-07-20 17:33:46+00:00,41.20244,-70.88544,12.177,574,b,0.0,0.0,0.240626,3.848282,L1,0.662263
4,EN617,1,8,2018-07-20 17:36:20+00:00,41.20284,-70.88546,7.261,575,a,0.0,0.0,0.197583,3.231418,L1,0.705555


In [6]:
cruises = ['en617','en627']

result = pd.concat([nut_nearest_stations(cruise) for cruise in cruises])
result.head()

Unnamed: 0,cruise,cast,niskin,date,latitude,longitude,depth,sample_id,replicate,nitrate_nitrite,ammonium,phosphate,silicate,nearest_station,station_distance
0,EN617,1,1,2018-07-20 17:30:51+00:00,41.20191,-70.88546,17.739,571,a,0.164754,0.0,0.316859,4.411799,L1,0.606315
1,EN617,1,1,2018-07-20 17:30:51+00:00,41.20191,-70.88546,17.739,572,b,0.200484,0.0,0.319452,4.400405,L1,0.606315
2,EN617,1,5,2018-07-20 17:33:46+00:00,41.20244,-70.88544,12.177,573,a,0.065505,0.0,0.233366,3.884538,L1,0.662263
3,EN617,1,5,2018-07-20 17:33:46+00:00,41.20244,-70.88544,12.177,574,b,0.0,0.0,0.240626,3.848282,L1,0.662263
4,EN617,1,8,2018-07-20 17:36:20+00:00,41.20284,-70.88546,7.261,575,a,0.0,0.0,0.197583,3.231418,L1,0.705555


In [7]:
OUTPUT_FILE = 'nut_nearest_stations.csv'

result.to_csv(OUTPUT_FILE, index=None)