In [2]:
import sys
sys.path.append('../src/')

%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
import geopandas as gpd
import numpy as np
from pathlib import Path
from zipfile import ZipFile
from io import BytesIO

In [4]:
# Set some constants

PATH = Path.cwd().parent.joinpath('data')

In [None]:
# Import Nato vessels
dfs = []

for file in PATH.joinpath('navy', 'gfw_tracks').rglob('*.zip'):
    name = file.stem.split(' - ')[0]
    zf = ZipFile(file)
    df = pd.read_csv(zf.open('data.csv'))
    df['vessel_name'] = name.lower()
    dfs.append(df)

nato = pd.concat(dfs)
nato.timestamp = pd.to_datetime(nato.timestamp)
nato.to_parquet(PATH.joinpath('voi', 'processed', 'nato_vessels.geojson'))
nato = gpd.GeoDataFrame(nato,
                        geometry=gpd.points_from_xy(x=nato.lon,
                                                    y=nato.lat),
                        crs=4326)
nato = nato.to_crs(28992)
len(nato)


In [None]:
# Import vessels

vessels = pd.read_parquet(PATH.joinpath('voi', 'processed', 'vessel_collection.parquet'))

vessels = gpd.GeoDataFrame(vessels, 
                           geometry=gpd.points_from_xy(x=vessels.longitude, 
                                                       y=vessels.latitude),
                           crs=4326)
vessels = vessels.to_crs(28992)
vessels.reset_index(inplace=True)
len(vessels)

In [None]:
# Perform a spatial join

df = gpd.sjoin_nearest(nato, vessels).merge(vessels, left_on='index_right', right_index=True)

df['distance'] = df.apply(lambda r: r['geometry_x'].distance(r['geometry_y']) / 1000, axis=1)

df = df[df['distance'] < 10].copy()
len(df)


In [None]:
# Clean it up

cols = [col for col in df.columns if '_y' in col]
df.drop(cols, inplace=True, axis=1)
df.drop(['index_right', 'seg_id'], inplace=True, axis=1)
df.columns = df.columns.str.replace('_x', '')
df = df.rename(columns={'t': 'timestamp_vessel',
                        'timestamp': 'timestamp_nato',
                        'lon': 'longitude_nato',
                        'lat': 'latitude_nato',
                        'longitude': 'longitude_vessel',
                        'latitude': 'latitude_vessel',
                        'name': 'name_vessel', 
                        'vessel_name_left': 'name_nato'})


In [None]:
# Filter on time difference

df['time_difference'] = abs(pd.to_datetime(df.timestamp_nato) - pd.to_datetime(df.timestamp_vessel))#.astype('timedelta64[s]').dt.seconds / 3600)
temporal_proximity = np.timedelta64(1, 'h')
df_proximity = df[df.time_difference <= temporal_proximity].copy()
gdf = gpd.GeoDataFrame(df_proximity)
len(gdf)

In [None]:
# Show

gdf[['vessel_name', 'geometry', 'vessel_name']].explore(tiles='CartoDB Positron')

### Research NATO exercises in North Sea

There are several date ranges and locations known with NATO exercises in the air and at sea. Let's see which vessels were in the neighbourhood at those times. There are roughly two areas of interest: 

1. The Waddenzee, North Sea (corner NL, DE, DK)
2. Belgian coast, Zeeland up until Rotterdam

In [1]:
# First create two bounding boxes for the areas of interest

NORTH_X_MIN = 4.26
NORTH_X_MAX = 9.09
NORTH_Y_MIN = 53.22
NORTH_Y_MAX = 55.24

SOUTH_X_MIN = 2.22
SOUTH_X_MAX = 4.45
SOUTH_Y_MIN = 51.19
SOUTH_Y_MAX = 51.99

In [15]:
# Import vessels north

df = pd.read_parquet(PATH.joinpath('voi', 'processed', 'vessel_collection.parquet'), 
                                columns=['latitude', 'longitude', 'vessel_name', 't'],
                                filters = [('latitude', '>', NORTH_Y_MIN),
                                            ('latitude', '<', NORTH_Y_MAX),
                                            ('longitude', '>', NORTH_X_MIN),
                                            ('longitude', '<', NORTH_Y_MAX)])

df['region'] = 'north'
len(df)


2576935

In [16]:
# Filter date ranges

north = pd.concat([df.loc['2014-03-31': '2014-04-11'],
                   df.loc['2015-04-13': '2015-04-24'],
                   df.loc['2016-04-11': '2016-04-25'],
                   df.loc['2018-04-08': '2018-04-18'],
                   df.loc['2019-04-01': '2019-04-12'],
                   df.loc['2020-04-14': '2020-05-01'],
                   df.loc['2021-09-14': '2021-09-20'],
                   df.loc['2022-01-24': '2022-01-28'],
                   df.loc['2022-03-28': '2022-04-08'],
                   df.loc['2022-10-10': '2022-10-14'],
                   df.loc['2022-10-17': '2022-10-23'],
                   df.loc['2023-02-02': '2023-02-13'],
                   df.loc['2023-12-11': '2023-12-14'],
                   df.loc['2017-03-27': '2017-04-07']
                   ])

len(north)

97147

In [17]:
# Import vessels south

df = pd.read_parquet(PATH.joinpath('voi', 'processed', 'vessel_collection.parquet'), 
                                    columns=['latitude', 'longitude', 'vessel_name', 't'],
                                    filters = [('latitude', '>', SOUTH_Y_MIN),
                                                ('latitude', '<', SOUTH_Y_MAX),
                                                ('longitude', '>', SOUTH_X_MIN),
                                                ('longitude', '<', SOUTH_Y_MAX)])
df['region'] = 'south'
len(df)



740127

In [18]:
# Filter date ranges

south = pd.concat([df.loc['2021-03-03': '2021-03-31'],
                   df.loc['2021-11-22': '2021-12-15'],
                   df.loc['2023-11-21': '2023-12-15'],
                   df.loc['2022-10-17': '2022-10-30'],
                   df.loc['2021-10-07': '2021-10-21']
                ])

len(south)

27831

In [23]:
# Bring it all together

df = pd.concat([north, south])

gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(x=df.longitude, y=df.latitude), crs=4326)
gdf['timestamp'] = gdf.index
gdf.reset_index(drop=True, inplace=True)
len(gdf)

124978

In [29]:
gdf[gdf.vessel_name == 'sirius'][['geometry', 'vessel_name']].explore()

In [30]:
gdf.to_file(PATH.joinpath('voi', 'processed', 'oefeningen.geojson'), driver='GeoJSON')