# Get Buoy and Vessel data

In [None]:
# parameters

lit_data_url = "https://osis.geomar.de/underway/3ka2chb/api/v1/positions/Littorina2023?include_payloads=false&earliest_timestamp=2023-04-24"
lit_data_file = "lit_positions.csv"

buoy_zip_url = "https://cloud.geomar.de/s/tNRg9raGEK357e4/download"
buoy_zip_file = "buoy_positions.zip"
buoy_data_path = "data/"
buoy_data_path_full = "data/2023-05-03_Drifter_Filedrop/"
buoy_data_file = "buoy_positions.csv"

In [None]:
!curl -o {buoy_zip_file} {buoy_zip_url}

In [None]:
!unzip -q {buoy_zio_file} -d {buoy_data_path}

## Buoy positions

In [None]:
import pandas as pd
import hvplot.pandas

from pathlib import Path

In [None]:
def _try_reading_csv(file):
    try:
        return pd.read_csv(file)
    except pd.errors.EmptyDataError:
        return None


def load_all_buoy_csv_files(path=None):
    files = sorted(Path(path).glob("*.csv"))
    _dfs = [_try_reading_csv(f) for f in files]
    _dfs = [_df for _df in _dfs if _df is not None]
    df = pd.concat(_dfs, ignore_index=True)
    df["date_UTC"] = pd.to_datetime(df["date_UTC"])
    df["D_number"] = df["D_number"].astype(int).astype(str)
    df = df.set_index(["D_number", "date_UTC"])
    df = df.sort_index()
    df = df.drop_duplicates()
    df = df.reset_index()
    return df

In [None]:
%%time

df_buoys = load_all_buoy_csv_files(
    path=buoy_data_path_full,
)

df_buoys

In [None]:
df_buoys.to_csv(buoy_data_file, index=False)
!head -n5 {buoy_data_file}

In [None]:
df_buoys.date_UTC.max()

## Vessel positions

In [None]:
import geopandas

In [None]:
def read_littorina_positions(url=None):
    df = geopandas.read_file(
        url
    )
    
    df["Longitude"] = df.geometry.apply(lambda p: p.x)
    df["Latitude"] = df.geometry.apply(lambda p: p.y)
    df = df.drop(columns=["geometry", ])
    
    # readacted positions
    redact_here = df.datastream.str.contains("MarineTraffic")
    df["Longitude_"] = (
        redact_here * df["Longitude"].round(1)
        + ~redact_here * df["Longitude"]
    )
    df["Latitude_"] = (
        redact_here * df["Latitude"].round(1)
        + ~redact_here * df["Latitude"]
    )
    
    return df

In [None]:
df_lit = read_littorina_positions(url=lit_data_url)

In [None]:
df_lit.to_csv(lit_data_file, index=False)
!head -n5 {lit_data_file}