In [2]:
import json
import pandas as pd
import requests


from datetime import date, datetime
from geopy.distance import geodesic
from typing import List, Tuple, Union

In [23]:
def get_ais_hourly(start_date: date, end_date: date, imo: str) -> Union[str, None]:
    """
    Retrieves hourly AIS data for the given date-range.

    Writes the data to file and then returns file path if successful.
    """
    sdate_str, edate_str = str(start_date), str(end_date)
    url = f"https://api.hackathon.mercuria-apps.com/api/ais-hourly/?start_date={sdate_str}&end_date={edate_str}&page_size=5000&imo={imo}"
    headers = {
        "Authorization": "Token f1048ff37ed94fbdfe5df4798d5860ca4c18b13c",
    }

    payload = {"start_date": start_date, "end_date": end_date}

    r = requests.get(url=url, headers=headers)

    path = None
    if r.status_code == 200:
        path = f"ais-hourly-{sdate_str}-{edate_str}.json"
        with open(path, "w") as f:
            f.write(r.text)
    else:
        print("Dates not in data.")
        print(r.json())
    return path


imo = 9643271
start_date = date(2022, 8, 1)
end_date = date(2022, 8, 30)
get_ais_hourly(start_date, end_date, imo)


'ais-hourly-2022-08-01-2022-08-30.json'

In [4]:
def get_ais_hourly_all_pages(start_date: date, end_date: date) -> pd.DataFrame:
    """
    Retrieves hourly AIS data for the given date-range, all pages.

    Returns as df
    """
    sdate_str, edate_str = str(start_date), str(end_date)

    url = f"https://api.hackathon.mercuria-apps.com/api/ais-hourly/?start_date={sdate_str}&end_date={edate_str}"
    headers = {
        "Authorization": "Token f1048ff37ed94fbdfe5df4798d5860ca4c18b13c",
    }

    r = requests.get(url=url, headers=headers)

    result_df = pd.json_normalize(r.json()["results"])

    print(r.json()["next"])
    # Fetch potential pages
    current = r.json()
    finished = False
    while not finished:
        url = current["next"]
        headers = {
            "Authorization": "Token f1048ff37ed94fbdfe5df4798d5860ca4c18b13c",
        }

        r = requests.get(url=url, headers=headers)
        current = r.json()
        print(current["next"])
        result_df = pd.concat(result_df, pd.json_normalize(current["results"]))
        if len(current["next"]) == 0:
            finished = True

    # path = None
    # if r.status_code == 200:
    #     path = f"ais-hourly-{sdate_str}-{edate_str}.json"
    #     with open(path, "w") as f:
    #         f.write(r.text)
    # else:
    #     print("Dates not in data.")
    #     print(r.json())
    return result_df


start_date = date(2020, 8, 1)
end_date = date(2022, 8, 2)
result_df = get_ais_hourly_all_pages(start_date, end_date)
result_df.head()


http://api.hackathon.mercuria-apps.com/api/ais-hourly/?end_date=2022-08-02&page=2&start_date=2020-08-01


KeyboardInterrupt: 

In [3]:
def calculate_distance(coord1: Tuple[float, float], coord2: Tuple[float, float]) -> float:
    """
    Calculates the distance between two coordinates, returns km.
    """
    return geodesic(coord1, coord2).km

In [4]:
def calculate_velocity_1h(coord1: Tuple[float, float], coord2: Tuple[float, float]) -> float:
    d = calculate_distance(coord1, coord2)
    return d/1

In [5]:
def json_file_to_df(path: str) -> pd.DataFrame:
    with open(path, 'r') as f:
        json_contents = json.load(f)
        return pd.json_normalize(json_contents['results'])

In [6]:
def is_roughly_eta(this_eta: datetime, original_eta: datetime) -> bool:
    """
    Determines whether a date is more or less around the original ETA.
    """
    return abs(this_eta - original_eta) <= 5

In [7]:
# August 2022
start_date = date(2020, 8, 1)
end_date = date(2022, 8, 30)

path = get_ais_hourly(start_date, end_date)

In [8]:
df = json_file_to_df(path)
df[df['imo'] == 7382249].ais_eta

Series([], Name: ais_eta, dtype: object)

To get vessel over a voyage records:

1. Uniquely identify vessel using IMO ((International Maritime Organization) number
2. How to identify single voyage? Combination of destination, ETA (date only, +- 5 days)

In [9]:
def get_voyages(imo: str) -> pd.DataFrame:
    """
    Given a vessel identifier (IMO), return its voyage-spsecific AIS data
    in a df.

    A voyage is uniquely identified using IMO, detination, and is_roughly_eta
    (in case the eta slightly varies as AIS is being sent)

    Adds a voyage_id column in the format of f'{IMO}-{seq}'.
    """
    pass
