In [None]:
import sys
sys.path.append('../src/')

%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
from pathlib import Path
from zipfile import ZipFile
from io import BytesIO

In [None]:
# Set some constants

PATH = Path.cwd().parent.joinpath('data')

# Create a bounding box for North Sea

MIN_X = 0
MIN_Y = 50
MAX_X = 13
MAX_Y = 58

In [None]:
# Import Nato vessels
dfs = []

for file in PATH.joinpath('navy', 'gfw_tracks').rglob('*.zip'):
    name = file.stem.split(' - ')[0]
    zf = ZipFile(file)
    df = pd.read_csv(zf.open('data.csv'))
    df['vessel_name'] = name.lower()
    dfs.append(df)

nato = pd.concat(dfs)
nato.timestamp = pd.to_datetime(nato.timestamp)
nato.to_parquet(PATH.joinpath('voi', 'processed', 'nato_vessels.geojson'))
nato = gpd.GeoDataFrame(nato,
                        geometry=gpd.points_from_xy(x=nato.lon,
                                                    y=nato.lat),
                        crs=4326)
nato = nato.to_crs(28992)
len(nato)


In [None]:
# Import vessels

vessels = pd.read_parquet(PATH.joinpath('voi', 'processed', 'vessels_collection.parquet'), columns=['latitude', 'longitude', 'name'])
vessels = gpd.GeoDataFrame(vessels, 
                           geometry=gpd.points_from_xy(x=vessels.longitude, 
                                                       y=vessels.latitude),
                           crs=4326)
vessels = vessels.to_crs(28992)
vessels.reset_index(inplace=True)
len(vessels)

In [None]:
df = gpd.sjoin_nearest(nato, vessels).merge(vessels, left_on='index_right', right_index=True)

df['distance'] = df.apply(lambda r: r['geometry_x'].distance(r['geometry_y']) / 1000, axis=1)

df = df[df['distance'] < 10].copy()
len(df)


In [None]:
cols = [col for col in df.columns if '_y' in col]
df.drop(cols, inplace=True, axis=1)
df.drop(['index_right', 'seg_id'], inplace=True, axis=1)
df.columns = df.columns.str.replace('_x', '')
df = df.rename(columns={'t': 'timestamp_vessel',
                        'timestamp': 'timestamp_nato',
                        'lon': 'longitude_nato',
                        'lat': 'latitude_nato',
                        'longitude': 'longitude_vessel',
                        'latitude': 'latitude_vessel',
                        'name': 'name_vessel', 
                        'vessel_name_left': 'name_nato'})


In [None]:
# Filter on time difference

df['time_difference'] = abs(pd.to_datetime(df.timestamp_nato) - pd.to_datetime(df.timestamp_vessel))#.astype('timedelta64[s]').dt.seconds / 3600)
temporal_proximity = np.timedelta64(1, 'h')
df_proximity = df[df.time_difference <= temporal_proximity].copy()
gdf = gpd.GeoDataFrame(df_proximity)
len(gdf)

In [None]:
# Show


gdf[['vessel_name', 'geometry', 'vessel_name']].explore(tiles='CartoDB Positron')