In [43]:
import json
import pandas as pd
import requests


from datetime import date
from geopy.distance import geodesic
from typing import Tuple, Union

In [41]:
def get_ais_hourly(start_date: date, end_date: date) -> Union[str, None]:
    """
    Retrieves hourly AIS data for the given date-range.

    Writes the data to file and then returns file path if successful.
    """
    sdate_str, edate_str = str(start_date), str(end_date)
    url = f"https://api.hackathon.mercuria-apps.com/api/ais-hourly/?start_date={sdate_str}&end_date={edate_str}"
    headers = {
        "Authorization": "Token f1048ff37ed94fbdfe5df4798d5860ca4c18b13c",
    }

    payload = {'start_date': start_date, 'end_date': end_date}

    r = requests.get(url=url, headers=headers)

    path = None
    if r.status_code == 200:
        path = f'ais-hourly-{sdate_str}-{edate_str}.json'
        with open(path, 'w') as f:
            f.write(r.text)
    else:
        print('Dates not in data.')
        print(r.json())
    return path

In [10]:
def calculate_distance(coord1: Tuple[float, float], coord2: Tuple[float, float]) -> float:
    """
    Calculates the distance between two coordinates, returns km.
    """
    return geodesic(coord1, coord2).km

In [11]:
def calculate_velocity_1h(coord1: Tuple[float, float], coord2: Tuple[float, float]) -> float:
    d = calculate_distance(coord1, coord2)
    return d/1

In [44]:
def json_file_to_df(path: str) -> pd.DataFrame:
    with open(path, 'r') as f:
        json_contents = json.load(f)
        return pd.json_normalize(json_contents['results'])

In [46]:
# August 2022
start_date = date(2022, 8, 1)
end_date = date(2022, 8, 2)

path = get_ais_hourly(start_date, end_date)

In [48]:
json_file_to_df(path)

Unnamed: 0,id,dt,imo,dwt,position_timestamp,ais_lat,ais_lon,ais_draft,ais_speed,ais_heading,...,port_id,port_name,port_entry_date,port_out_date,operation_location_id,operation_location_name,operation_location_type,operation_location_entry_date,operation_location_out_date,extraction_date
0,2703,2022-08-01T00:00:00Z,9108142,96168,2020-12-06T15:15:00Z,5.5233,5.7089,7.0000,0.0000,511.0000,...,,,,,,,,,,2022-08-01T00:02:58.875809Z
1,5454,2022-08-01T00:00:00Z,9651565,44508,2020-12-11T12:11:43Z,38.3259,117.8740,6.9000,0.0000,,...,,,,,,,,,,2022-08-01T00:01:37.759808Z
2,8988,2022-08-01T00:00:00Z,9183324,301620,2020-12-11T19:22:43Z,11.0627,-64.9522,10.8000,0.0000,53.0000,...,,,,,,,,,,2022-08-01T00:02:58.875809Z
3,12051,2022-08-01T00:00:00Z,8001763,53171,2020-12-12T14:59:27Z,0.7337,118.1590,9.0000,0.1000,0.0000,...,,,,,,,,,,2022-08-01T00:02:58.875809Z
4,14961,2022-08-01T00:00:00Z,9182784,18596,2020-12-16T11:28:58Z,25.3648,55.3792,7.2000,0.0000,511.0000,...,,,,,,,,,,2022-08-01T00:02:58.875809Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,16284984,2022-08-01T00:00:00Z,9539585,49992,2022-07-30T00:03:06Z,25.5233,56.9617,11.2000,12.0000,157.0000,...,,,,,,,,,,2022-08-01T00:02:58.875809Z
996,16285643,2022-08-01T00:00:00Z,9397690,53484,2022-07-30T00:05:12Z,2.7811,117.8770,6.3000,0.0000,,...,,,,,,,,,,2022-08-01T00:01:37.759808Z
997,16302133,2022-08-01T00:00:00Z,9914644,28700,2022-07-30T00:35:25Z,35.4897,129.4060,6.6000,0.0000,511.0000,...,,,,,,,,,,2022-08-01T00:02:58.875809Z
998,16304222,2022-08-01T00:00:00Z,9660530,81864,2022-07-30T01:03:13Z,21.3514,38.9779,9.3000,13.3000,,...,,,,,,,,,,2022-08-01T00:01:37.759808Z


In [None]:
def eta_date_range(dt: date) -> List[date]:
    pass

To get vessel over a voyage records:

1. Uniquely identify vessel using IMO ((International Maritime Organization) number
2. How to identify single voyage? Combination of destination, ETA (date only, +- 5 days)

In [None]:
# Get "vessel over a voyage" records
# One vessel; 


# Create IMO -> distance
# Calculate hourly distance for entire trip
# Sort by ID