In [1]:
import sys
sys.path.append('../src/')

%load_ext autoreload
%autoreload 2


In [2]:
import geopandas as gpd
import gfw
from pathlib import Path
import pandas as pd
from shapely import Polygon
from ast import literal_eval
import random
import string

In [3]:
PATH = Path.cwd().parent.joinpath('data')

## Data prep

In [4]:
# Get EEZ region from Global Fishing Watch

results = gfw.get_eez_list()

countries = ['Netherlands', 'Belgium', 'Denmark', 'Germany', 'Norway', 'United Kingdom']
eezs = []

for country in countries:
    country = [x for x in results if x.get('label') == country]
    country = country[0].get('id')
    eezs.append(country)


In [None]:
# Get events in EEZ

events = ['loitering', 'port_visits', 'ais', 'fishing', 'encounter']
results = []
for event in events:
    for eez in eezs:
        result = gfw.get_events_by_flag_and_geometry(flag='RUS',
                                                    start_date='2012-01-01',
                                                    end_date='2024-05-01',
                                                    event_type=event,
                                                    geometry=None,
                                                    region=eez,
                                                    path_out=PATH.joinpath('events', f'{event}_{eez}.json')
                                                    )
        results.append(result)

In [None]:
# Parse events

loitering = []
fishing = []
encounter = []
port_visit = []

for result in results:
    if result != None:
        result = result.get('entries')
        for record in result:
            if 'loitering' in record.keys():
                loitering.append(record)
            elif 'encounter' in record.keys():
                encounter.append(record)
            elif 'port_visit' in record.keys():
                port_visit.append(record)
            else:
                fishing.append(record)

loitering = pd.json_normalize(loitering)
encounter = pd.json_normalize(encounter)
port_visit = pd.json_normalize(port_visit)
fishing = pd.json_normalize(fishing)

print(f'Found {len(loitering)} loitering events\n{len(encounter)} encounter events\n{len(port_visit)} port visits\nand {len(fishing)} fishing events')

In [None]:
# Write to file

dfs = [loitering, encounter, port_visit, fishing]
names = ['loitering.csv', 'encounter.csv', 'port_visit.csv', 'fishing.csv']

for df, name in zip([dfs, names]):
    df.to_csv(PATH.joinpath('events', name), index=False)

In [None]:
# Get vessel info and owner information

# Get vessel ids
ids = loitering['vessel.id'].tolist() + encounter['vessel.id'].tolist() + port_visit['vessel.id'].tolist() + fishing['vessel.id'].tolist()
ids = list(set(ids))

# Query Global Fishing Watch
result = gfw.get_vessel_info(ids, PATH.joinpath('events', 'vessel_info.json'))

# Parse results
info = []
owners = []

for results in r:
    for result in results.get('entries'):
        res_info = result.get('registryInfo')
        res_owners = result.get('registryOwners')
        for inf in res_info:
            info.append(inf)
        for owner in res_owners:
            owners.append(owner)

info = pd.DataFrame(info)
owners = pd.DataFrame(owners)

# Write to file
info.to_csv(PATH.joinpath('events', 'vessel_info.csv'), index=False)
owners.to_csv(PATH.joinpath('events', 'vessel_owners.csv'), index=False)

## Analysis

In [5]:
# Import data

loitering = pd.read_csv(PATH.joinpath('events', 'loitering.csv'))
encounter = pd.read_csv(PATH.joinpath('events', 'encounter.csv'))
port_visit = pd.read_csv(PATH.joinpath('events', 'port_visit.csv'))
fishing = pd.read_csv((PATH.joinpath('events', 'fishing.csv')))
owners = pd.read_csv(PATH.joinpath('events', 'vessel_owners.csv'))
info = pd.read_csv(PATH.joinpath('events', 'vessel_info.csv'))

In [None]:
# Focus on Netherlands and Belgian EEZ

nl_loitering_ids = loitering[loitering['regions.eez'].apply(lambda x: str(eezs[0]) in x)]['vessel.ssvid'].unique()
be_loitering_ids = loitering[loitering['regions.eez'].apply(lambda x: str(eezs[1]) in x)]['vessel.ssvid'].unique()

In [6]:
# Import pipelines and telecom

lines = gpd.read_parquet(PATH.joinpath('voi', 'context', 'gis', 'lines.parquet'))
lines.mps_uuid = lines.mps_uuid.apply(lambda x: ''.join(random.choices(string.ascii_lowercase + string.digits, k=16)) if x is None else x)
len(lines)

# Import platforms, connectors and seabed features

points = gpd.read_parquet(PATH.joinpath('voi', 'context', 'gis', 'points.parquet'))
points.mps_uuid = points.mps_uuid.apply(lambda x: ''.join(random.choices(string.ascii_lowercase + string.digits, k=16)) if x is None else x)
len(points)


115447

In [9]:
# Loitering: Create geodataframe

loitering = gpd.GeoDataFrame(loitering, 
                             geometry=gpd.points_from_xy(x=loitering['position.lon'],
                                                         y=loitering['position.lat'],
                                                         crs=4326))

# Change CRS for distance measuring

loitering = loitering.to_crs(28992)
lines = lines.to_crs(28992)
points = points.to_crs(28992)

loitering_ = gpd.sjoin_nearest(loitering, lines).merge(lines[['geometry']], left_on="index_right", right_index=True)

loitering_["distance"] = loitering_.apply(lambda r: r["geometry_x"].distance(r["geometry_y"]) / 1000, axis=1)

loitering_.drop(['geometry_y'], axis=1, inplace=True)

loitering_ = gpd.GeoDataFrame(loitering_, geometry='geometry_x', crs=28992)
loitering_ = loitering_.to_crs(4326)

# Check all loitering events in 1 km distance
distance = 1

loitering_ = loitering_[loitering_['distance'] <= distance].copy()

loitering_.to_file(PATH.joinpath('voi', 'processed', 'loitering_all_vessels.geojson'), driver='GeoJSON')

len(loitering_)


1157

In [7]:
# Fishing: Create geodataframe

fishing = gpd.GeoDataFrame(fishing, 
                             geometry=gpd.points_from_xy(x=fishing['position.lon'],
                                                         y=fishing['position.lat'],
                                                         crs=4326))

# Change CRS for distance measuring

fishing = fishing.to_crs(28992)
lines = lines.to_crs(28992)
points = points.to_crs(28992)

fishing_ = gpd.sjoin_nearest(fishing, lines).merge(lines[['geometry']], left_on="index_right", right_index=True)

fishing_["distance"] = fishing_.apply(lambda r: r["geometry_x"].distance(r["geometry_y"]) / 1000, axis=1)

fishing_.drop(['geometry_y'], axis=1, inplace=True)

fishing_ = gpd.GeoDataFrame(fishing_, geometry='geometry_x', crs=28992)
fishing_ = fishing_.to_crs(4326)

# Check all loitering events in 1 km distance
distance = 1

fishing_ = fishing_[fishing_['distance'] <= distance].copy()

fishing_.to_file(PATH.joinpath('voi', 'processed', 'fishing_all_vessels.geojson'), driver='GeoJSON')

len(fishing_)


1105

In [11]:
# Let's explore the loitering a bit more...

ids = loitering_['vessel.ssvid'].unique()
ids[0:5]

array([273333650, 273310400, 273437230, 273133200, 273298320])

In [14]:
owners.ssvid = owners.ssvid.astype('int')

In [15]:
loitering_owners = owners[owners.ssvid.isin(ids)].copy()
len(loitering_owners)

282

In [19]:
loitering_owners.name.value_counts()

name
VOLGA SHIPPING JOINT STOCK            19
NORTHERN SHIPPING                     16
NORTH WESTERN FLEET                    7
GTLK MALTA                             6
RUSSIA GOVT                            6
                                      ..
HARVEST                                1
KALININ FISHING COLLECTIVE FARM JA     1
ATLANTRYBFLOT JSC                      1
OZERNOVSKIY RYBOKONSERVNYY             1
OSTROV SAKHALIN JSC                    1
Name: count, Length: 159, dtype: int64

In [None]:
# Create Polygons for ports

ijmuiden = ((4.504274, 52.493004),
          (4.910082, 52.449922),
          (4.89085588546282, 52.37956124860245),
          (4.394411, 52.413918),
          (4.504274, 52.493004))

rotterdam = ((3.892931,52.034854),
             (4.443900,51.956091),
             (4.423893,51.803771),
             (3.745890,51.899889),
             (3.892931,52.034854))

zeeland = ((3.299345,51.546999),
           (4.350941,51.467881),
           (4.273901,51.198332),
           (3.064373,51.403045),
           (3.299345,51.546999))

den_helder = ((4.641531,53.027905),
              (4.880262,53.010702),
              (4.892696,52.923835),
              (52.914838, 4.641531),
              (4.641531,53.027905))

urk = ((5.506152,52.730952),
       (5.690174,52.709111),
       (5.710574,52.598706),
       (5.535640,52.626007),
       (5.506152,52.730952)
       )

eemshaven = ((6.585831,53.652322),
             (7.044206,53.753703),
             (7.059318,53.266950),
             (6.578275,53.367753),
             (6.585831,53.652322))

harlingen = ((5.336631,53.214196),
             (5.525522,53.227767),
             (5.487744,53.129655),
             (5.319001,53.151561),
             (5.336631,53.214196))

eez = ((4.389826,53.336753),
       (2.980557,51.565663),
       (3.571947,51.381483),
       (4.402408,52.167808),
       (4.874262,52.864583),
       (5.704723,53.310448),
       (6.969290,53.297290),
       (6.560350,53.721935),
       (4.175919,53.548476),
       (4.389826,53.336753))

antwerpen = ((4.102983,51.395771),
             (4.349450,51.392949),
             (4.479467,51.224742),
             (4.195691,51.230406),
             (4.102983,51.395771))

zeebrugge = ((3.117470,51.369696),
             (3.260663,51.373926),
             (3.289121,51.296882),
             (3.133731,51.288972),
             (3.117470,51.369696))

oostende = ((2.879291,51.249601),
            (2.964360,51.258848),
            (2.979583,51.224091),
            (2.885559,51.206983),
            (2.879291,51.249601))

In [None]:

geom = Polygon(antwerpen).__geo_interface__
path = PATH.joinpath('antwerpen.json')

In [None]:
result = gfw.get_events_by_flag_and_geometry(flag='RUS', 
                                             start_date='2020-01-01',
                                             end_date='2024-01-01',
                                             geometry=geom,
                                             region=None,
                                             path_out=path,
                                             event_type='port_visits')

len(result.get('entries'))

## Parse data

In [None]:
records = []

names = ['nl_pipes_loitering_4km']

for name in names:

    with open(PATH.joinpath('events', f'{name}.json'), 'r') as file:
        file = file.read()
        for entries in file:
            entries = literal_eval(entries)
            entries = entries.get('entries')
            for entry in entries:
                p = entry.get('position')
                v = entry.get('vessel')
                pv = entry.get('port_visit')

                record = {'start': entry.get('start'),
                        'end': entry.get('end'),
                        'id': entry.get('id'),
                        'lat': p.get('lat'),
                        'lon': p.get('lon'),
                        'vessel_id' : v.get('id'),
                        'vessel_name': v.get('name'),
                        'mmsi': v.get('ssvid'),
                        'type': v.get('type'),
                        'visit_id': pv.get('visit_id'),
                        'confidence': pv.get('confidence'),
                        'duration_hours': pv.get('durationHrs'),
                        'file': f'{name}.json'}
            
                for item in pv.keys():
                    if 'Anchorage' in item:
                        items = pv.get(item)
                        for key, value in items.items():
                            record.update({f'{item[0]}_{key}'.lower(): value})
                
                records.append(record)

df = pd.DataFrame(records)
df.drop_duplicates(subset=['vessel_id', 'start', 'end'], inplace=True)
len(df)

In [None]:
df.to_csv(PATH.joinpath('port_visits_belgium.csv'), index=False)

## Get activity near infrastructure

Let's create polygons (buffers) around pipelines, cables and other infrastructure and see if we can find Russian vessels loitering there.

In [None]:
pipes = PATH.joinpath('gis', 'mps_mapping_pipeline.gpkg')

In [None]:
import fiona
fiona.listlayers(pipes)

In [None]:
pipelines = gpd.read_file(PATH.joinpath('gis', 'mps_mapping_pipeline.gpkg'), layer='mps_mapping_pipeline')

In [None]:
pipelines = pipelines[(pipelines.admin_area_name.isin(['Belgium', 'Netherlands', 'Denmark', 'Norway', 'United Kingdom', 'Germany'])) & (pipelines.mps_est_shore_status=='OFFSHORE')].copy()
len(pipelines)

In [None]:
pipelines.to_csv(PATH.joinpath('gis', 'pipes_ns.csv'), index=False)

In [None]:
pipelines.to_file(PATH.joinpath('gis', 'pipelines_for_gfw.geojson'), driver='GeoJSON')

In [None]:
nl = pipelines[pipelines.admin_area_name=='Netherlands'].copy()
nl = nl.to_crs(28992)
nl = nl.geometry.buffer(2000)
nl = nl.reset_index(drop=True)

In [None]:
nl = nl.to_crs(4326)

In [None]:
for pipe in nl:
    gfw.get_events_by_flag_and_geometry(flag='RUS',
                                        start_date='2015-01-01',
                                        end_date='2023-12-31',
                                        event_type='loitering',
                                        geometry=pipe.__geo_interface__,
                                        path_out=PATH.joinpath('events', 'nl_pipes_loitering_4km.json'))
    