In [3]:
import json
import pandas as pd
import requests


from datetime import date, datetime
from geopy.distance import geodesic
from typing import List, Tuple, Union

In [67]:
def get_ais_hourly(start_date: date, end_date: date) -> Union[str, None]:
    """
    Retrieves hourly AIS data for the given date-range.

    Writes the data to file and then returns file path if successful.
    """
    sdate_str, edate_str = str(start_date), str(end_date)
    url = f"https://api.hackathon.mercuria-apps.com/api/ais-hourly/?start_date={sdate_str}&end_date={edate_str}"
    headers = {
        "Authorization": "Token f1048ff37ed94fbdfe5df4798d5860ca4c18b13c",
    }

    payload = {'start_date': start_date, 'end_date': end_date}

    r = requests.get(url=url, headers=headers)

    path = None
    if r.status_code == 200:
        path = f'ais-hourly-{sdate_str}-{edate_str}.json'
        with open(path, 'w') as f:
            f.write(r.text)
    else:
        print('Dates not in data.')
        print(r.json())
    return path

In [68]:
def calculate_distance(coord1: Tuple[float, float], coord2: Tuple[float, float]) -> float:
    """
    Calculates the distance between two coordinates, returns km.
    """
    return geodesic(coord1, coord2).km

In [69]:
def calculate_velocity_1h(coord1: Tuple[float, float], coord2: Tuple[float, float]) -> float:
    d = calculate_distance(coord1, coord2)
    return d/1

In [70]:
def json_file_to_df(path: str) -> pd.DataFrame:
    with open(path, 'r') as f:
        json_contents = json.load(f)
        return pd.json_normalize(json_contents['results'])

In [71]:
def is_roughly_eta(this_eta: datetime, original_eta: datetime) -> bool:
    """
    Determines whether a date is more or less around the original ETA.
    """
    return abs(this_eta - original_eta) <= 5

In [72]:
# August 2022
start_date = date(2022, 7, 1)
end_date = date(2022, 8, 30)

path = get_ais_hourly(start_date, end_date)

KeyboardInterrupt: 

In [79]:
df = json_file_to_df(path)
df

(1000, 26)

In [74]:
df[['position_timestamp','ais_eta']]

Unnamed: 0,position_timestamp,ais_eta
0,2022-08-01T00:00:00Z,2022-08-05 22:00:00
1,2022-08-01T00:00:00Z,2022-07-02 19:01:00
2,2022-08-01T00:00:00Z,2022-08-02 13:00:00
3,2022-08-01T00:00:00Z,2022-08-02 20:00:00
4,2022-08-01T00:00:00Z,2022-08-09 23:00:00
...,...,...
995,2022-08-01T00:04:41Z,2022-07-29 03:00:00
996,2022-08-01T00:04:41Z,2022-08-08 10:00:00
997,2022-08-01T00:04:41Z,2022-08-09 12:00:00
998,2022-08-01T00:04:42Z,2022-07-03 14:00:00


In [80]:
df['dt1'] = df['position_timestamp'].str[:10]
df['dt2'] = df['ais_eta'].str[:10]
df['timestamp'] = df['dt1'].apply(lambda x: datetime. strptime(x, '%Y-%m-%d'))
df['arrival'] = df['dt2'].apply(lambda x: datetime. strptime(x, '%Y-%m-%d'))
duration = df.groupby(['imo'])['timestamp'].agg(['min'])
duration['arrival'] = df.groupby(['imo'])['arrival'].agg(['max'])
duration['length'] = duration['arrival'] - duration['min']
duration

Unnamed: 0_level_0,min,arrival,length
imo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7128423,2022-07-01,2022-06-30,-1 days
7413218,2022-07-01,2022-07-04,3 days
8110681,2022-07-01,2022-06-30,-1 days
8307947,2022-07-01,2022-06-29,-2 days
8420373,2022-07-01,1970-01-01,-19174 days
...,...,...,...
9932737,2022-07-01,2022-07-01,0 days
9935454,2022-07-01,2022-07-05,4 days
9935727,2022-07-01,2022-07-01,0 days
9935844,2022-07-01,2022-07-01,0 days


In [76]:
df.groupby(['imo'])['position_timestamp'].first()


imo
5277062    2022-08-01T00:01:56Z
7318901    2022-08-01T00:00:09Z
7325899    2022-08-01T00:00:51Z
7373949    2022-08-01T00:02:12Z
7382249    2022-08-01T00:03:53Z
                   ...         
9929015    2022-08-01T00:03:49Z
9929041    2022-08-01T00:01:02Z
9936240    2022-08-01T00:01:24Z
9943530    2022-08-01T00:02:15Z
9953183    2022-08-01T00:01:39Z
Name: position_timestamp, Length: 886, dtype: object

To get vessel over a voyage records:

1. Uniquely identify vessel using IMO ((International Maritime Organization) number
2. How to identify single voyage? Combination of destination, ETA (date only, +- 5 days)

In [82]:
duration.length.value_counts()

0 days       93
1 days       63
2 days       61
-1 days      60
5 days       43
             ..
-29 days      1
-70 days      1
-218 days     1
-33 days      1
-62 days      1
Name: length, Length: 95, dtype: int64

In [None]:
def get_voyages(imo: str) -> pd.DataFrame:
    """
    Given a vessel identifier (IMO), return its voyage-spsecific AIS data
    in a df.

    A voyage is uniquely identified using IMO, detination, and is_roughly_eta
    (in case the eta slightly varies as AIS is being sent)

    Adds a voyage_id column in the format of f'{IMO}-{seq}'.
    """
    df1 = df.groupby('imo')
