In [1]:
import sys
sys.path.append('../src/')

%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import json
from pathlib import Path
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon

In [3]:
PATH = Path.cwd().parent.joinpath('data')

# Get vessels

Get vessels from [OpenSanctions](https://opensanctions.org) and Global Fishing Watch

In [None]:
# Parse Open Sanction records

records = []
with open(PATH.joinpath('vessels', 'entities.ftm.json'), 'r') as file:
    for record in file:
        if json.loads(record).get('schema') == 'Vessel':
            records.append(json.loads(record).get('properties'))

df = pd.DataFrame(records)

cols = ['sourceUrl', 'registrationNumber', 'callSign', 'flag', 'createdAt']

for col in cols:
    df[col] = df[col].apply(lambda x: x[0] if x is not np.nan else x)

df.registrationNumber = df.registrationNumber.str.replace('IMO ', '')

ru = df[df.flag=='ru'][['sourceUrl', 'registrationNumber', 'topics', 'name', 'type', 'callSign', 'flag']].copy()
len(ru)

In [None]:
# Write to file

ru.to_csv(PATH.joinpath('vessels', 'sanctioned.csv'), index=False)

## Get context layers

In [None]:
# Import telecoms

dfs = []

for file in PATH.joinpath('gis', 'telcom').glob('*.shp'):
    gdf = gpd.read_file(file)
    gdf.columns = gdf.columns.str.lower()
    gdf = gdf.rename(columns={'naam': 'name', 
                              'eigenaar': 'owner', 
                              'name_': 'name',
                              'pipe_name': 'name', 
                              'uuid': 'mps_uuid',
                              'objectid': 'mps_uuid'})
    dfs.append(gdf)

gdf = pd.concat(dfs)

gdf.to_file(PATH.joinpath('gis', 'infra', 'telecom.geojson'), driver='GeoJSON')

In [4]:
### Import context layers


gdfs_points = []
gdfs_lines = []
gdfs_polygons = []

cols = ['mps_uuid', 'name', 'owner', 'owner_group', 'operator', 'operator_group', 'geometry']

for file in PATH.joinpath('gis', 'infra').glob('*geojson'):
    gdf = gpd.read_file(file)
    for col in cols:
        if col not in gdf.columns:
            gdf[col] = np.nan
    gdf = gdf[cols]
    gdf['dataset'] = file.stem
    if type(gdf.geometry[0]) == Point:
        gdfs_points.append(gdf)
    elif type(gdf.geometry[0]) == LineString:
        gdfs_lines.append(gdf)
    elif type(gdf.geometry[0]) == Polygon:
        gdfs_polygons.append(gdf)


  as_dt = pd.to_datetime(df[k], errors="ignore")
  as_dt = pd.to_datetime(df[k], errors="ignore", utc=True)


In [5]:
points = pd.concat(gdfs_points)
lines = pd.concat(gdfs_lines)
polygons = pd.concat(gdfs_polygons)

points.to_parquet(PATH.joinpath('gis', 'points.parquet'))
lines.to_parquet(PATH.joinpath('gis', 'lines.parquet'))
polygons.to_parquet(PATH.joinpath('gis', 'polygons.parquet'))