In [None]:
import cudf
import cuspatial as csp
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon
import os

In [None]:
yrs = [str(x) for x in (2019,2020,2021)]
file_names = []
for yr in yrs:
    file_names += [f"../../US_MARINE/{yr}/parquets/" + x for x in os.listdir(f"../../US_MARINE/{yr}/parquets/")]
cargo_file_names = sorted([x for x in file_names if "cargo" in x])
tanker_field_names = sorted([x for x in file_names if "tanker" in x])

In [None]:
los_angeles = Polygon([[-121.88232421875,34.9895035675793],
            [-121.97021484374999,31.316101383495624],
            [-117.5042724609375, 31.287939892641734],
            [-116.32873535156249,33.224903086263964],
            [-119.02587890624999,34.95349314197422],
            [-121.88232421875,34.9895035675793]])
boston = Polygon([[-70.86456298828124,42.71473218539458],
            [-71.20513916015625,42.48627657532139],
            [-71.21063232421875,42.116561350389006],
            [-70.938720703125,41.14970617453726],
            [-69.70275878906249,41.15384235711447],
            [-69.0435791015625,41.96357478222518],
            [-69.6697998046875,42.79540065303723],
            [-70.86456298828124,42.71473218539458]])
gom = Polygon([[-99.140625, 31.071755902820133],
            [-98.85498046875, 25.383735254706867],
            [-91.07666015625, 24.487148563173424],
            [-86.33056640625, 26.43122806450644],
            [-86.30859375, 29.668962525992505],
            [-88.35205078124999,31.59725256170666],
            [-95.49316406249999, 31.672083485607402],
            [-99.140625,31.071755902820133]])
df = pd.DataFrame([["LA",los_angeles],['GOM',gom],['boston',boston]], columns=['Region','geometry'])
df = gpd.GeoDataFrame(df, geometry='geometry', crs=4326)
print(df.info())
df.plot()

In [None]:
offsets = [0]
poly_points_x = []
poly_points_y = []
idx = []
for region in df.Region.unique():
    poly_points_x += [x[0] for x in df[df.Region == region].geometry.__geo_interface__['features'][0]['geometry']['coordinates'][0][:-1]]
    poly_points_y += [x[1] for x in df[df.Region == region].geometry.__geo_interface__['features'][0]['geometry']['coordinates'][0][:-1]]
    offsets += [len(poly_points_x)]
    idx += [region]
offsets = offsets[:-1]
offsets, idx
# poly_points_y = [x[1] for x in df[df.Region == "GOM"].geometry.__geo_interface__['features'][0]['geometry']['coordinates'][0][:-1]]

In [None]:
cudf.Series([x for x in range(len(idx))], index=idx)

In [None]:
%%time
vessel_names_list = []
list_of_gom_dfs = []
list_of_boston_dfs = []
list_of_LA_dfs = []
for i, f in enumerate(cargo_file_names[:]):
    tmp = cudf.read_parquet(f)
    t = csp.point_in_polygon(tmp.LON, tmp.LAT, cudf.Series([x for x in range(len(idx))], index=idx), offsets ,poly_points_x,poly_points_y)
    vessels = tmp.VesselName.unique().to_arrow().tolist()
    vessel_names_list += vessels
    tmp_gom = tmp.loc[t[t.GOM == True].index][['BaseDateTime','MMSI','LAT','LON','VesselName','IMO','SOG']].copy()
    tmp_la = tmp.loc[t[t.LA == True].index][['BaseDateTime','MMSI','LAT','LON','VesselName','IMO','SOG']].copy()
    tmp_boston = tmp.loc[t[t.boston == True].index][['BaseDateTime','MMSI','LAT','LON','VesselName','IMO','SOG']].copy()
    list_of_gom_dfs.append(tmp_gom)
    list_of_LA_dfs.append(tmp_la)
    list_of_boston_dfs.append(tmp_boston)
    if i % 20 == 0:
        print(i, f)
cargo_vessel_names = set(vessel_names_list)

In [None]:
%time
gomdf = cudf.concat(list_of_gom_dfs, ignore_index=True)
gomdf

In [None]:
%%time
ladf = cudf.concat(list_of_LA_dfs, ignore_index=True)
ladf

In [None]:
%%time
bosdf = cudf.concat(list_of_boston_dfs, ignore_index=True)
bosdf

In [None]:
%%time
objects, traj_offsets = csp.derive_trajectories(gomdf.MMSI, gomdf.LON, gomdf.LAT, gomdf.BaseDateTime)
objects

In [None]:
traj_offsets

In [None]:
gomdf[gomdf.MMSI == 205087000.0]