In [None]:
import sys
sys.path.append('../src/')

%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from pathlib import Path
from openmeteo import get_meteo
from dotenv import load_dotenv; load_dotenv()
import os
import requests

In [None]:
PATH_DATA = Path.cwd().parent.joinpath('data')

## Get vessel tracks

In [None]:
# Parse tracks from Global Fishing Watch

dfs = []

for file in PATH_DATA.joinpath('gfw_tracks').glob('*.csv'):
    df = pd.read_csv(file)
    filename = file.stem
    df['vessel'] = filename
    dfs.append(df)

df = pd.concat(dfs)

df['date'] = pd.to_datetime(df['timestamp'], unit='ms', errors='coerce')
df['date_normalised'] = df['date'].dt.normalize().astype('str')
df['hour'] = df['date'].dt.hour
df.sort_values(by='date', inplace=True)
df = df[df.speed > 0].copy()
df = df[df['date'] >= '2023-11-01'].copy()
#df = df[(df.hour== 14) | (df.hour == 2)].copy()
df = df.drop_duplicates(subset=['vessel', 'date_normalised', 'hour'], keep='first')
df.reset_index(drop=True, inplace=True)
len(df)

In [None]:
# Select vessels

vessels = ['sarahm', 'ganadoexpress']
batch = df[df.vessel.isin(vessels)].copy()
len(batch)

## Historical weather API

In [None]:
WWO_API_KEY = os.environ.get('WWO_API_KEY')
BASE_URL = 'https://api.worldweatheronline.com/premium/v1/past-marine.ashx'

results = []
failed = []

for i, row in batch.iterrows():
    start = row.date_normalised
    lat = row.lat
    lon = row.lon
    
    url = f'{BASE_URL}?key={WWO_API_KEY}&q={lat},{lon}&format=json&date={start}'
    result = requests.get(url)

    if i % 100 == 0:
        print(f'Reached index {i}')

    if result.status_code == 200 or result.status_code == 201:
        result = result.json()
        
        if result.get('data').get('error') is not None:
            with open(PATH_DATA.joinpath('meteo', 'meteo_select_results_failed.json'), 'a') as file:
                result = {"vessel": row.vessel,
                          "index": i,
                          "response": result}
                file.write(f'{result}\n')
            failed.append(i)
            continue
        
        else:
            result.update({'lon': row.lon,
                           'lat': row.lat,
                           'vessel': row.vessel,
                           'timestamp': row.timestamp,
                           })
        
            with open(PATH_DATA.joinpath('meteo', 'meteo_select_results.json'), 'a') as file:
                file.write(f'{result}\n')
            
            results.append(result)
    else:
        failed.append(i)

In [None]:
# Parse data

daily = []
hourly = []
for r in results:
    if r.get('data').get('weather') is not None:
        data = r.get('data').get('weather')[0]
        query = {'lon': r.get('lon'),
                'lat': r.get('lat'),
                'vessel': r.get('vessel'),
                'timestamp': r.get('timestamp')
                }
        d = {}
        for k, v in data.items():
            
            if k == 'astronomy':
                d.update(v[0].items())
            if k != 'hourly' and k !='astronomy':
                d.update({k: v})
        d.update(query)
        daily.append(d)
        

        data = r.get('data').get('weather')[0].get('hourly')
        d = {}

        for d in data:
            for k, v in d.items():
                d.update({k: v})
        d.update(query)
        hourly.append(d)

df_daily = pd.DataFrame(daily)
df_hourly = pd.DataFrame(hourly)

# Write to file
df_daily.to_csv(PATH_DATA.joinpath('meteo', 'meteo_results_daily.csv'), index=False, mode='a', header=None)
df_hourly.to_csv(PATH_DATA.joinpath('meteo', 'meteo_results_hourly.csv'), index=False, mode='a', header=None)


In [None]:
df_hourly.to_csv(PATH_DATA.joinpath('meteo', 'selected_vessels.csv'), index=False)

## Get Weather Data - Historical Weather API 

[This historical weather API](https://open-meteo.com/en/docs/historical-weather-api) is available for quering weather data along the route. The historical data isn't specialised in maritime environments though, so another service might be needed.

In [None]:
dfs = []
for i, row in gdf.iterrows():
    start = row['date_normalised']
    end = row['date_normalised']
    lat = row['lat']
    lon = row['lon']
    df = get_meteo(lon=lon, lat=lat, start_date=start, end_date=end)
    df['query_lat'] = lat
    df['query_lon'] = lon
    df['query_start_date'] = start
    df['query_end_date'] = end
    df['query_hour'] = row.hour
    dfs.append(df)
