# AIS Dataset Exploration

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import shapely.geometry
from shapely import wkt
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from glob import glob

import ee
ee.Initialize()

## Load in data

In [None]:
data_dir = '/home/k3blu3/datasets/cerulean'
ais_name = '19_ais'
model_name = '19_coincidence'
slick_name = '19_vectors'
truth_file = 'slick_truth_year1.csv'

In [None]:
truth = pd.read_csv(os.path.join(data_dir, truth_file))

In [None]:
truth = truth.rename(columns={'PID': 'basename'})

In [None]:
ais_files = glob(os.path.join(os.path.join(data_dir, ais_name), '*.geojson'))
model_files = glob(os.path.join(os.path.join(data_dir, model_name), '*.csv'))
slick_files = glob(os.path.join(os.path.join(data_dir, slick_name), '*.geojson'))

In [None]:
ais = pd.DataFrame(ais_files)
model = pd.DataFrame(model_files)
slick = pd.DataFrame(slick_files)

In [None]:
ais = ais.rename(columns={0: 'fname'})
model = model.rename(columns={0: 'fname'})
slick = slick.rename(columns={0: 'fname'})

In [None]:
ais['basename'] = ais['fname'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])
model['basename'] = model['fname'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])
slick['basename'] = slick['fname'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])

In [None]:
ais = ais.drop_duplicates(subset=['basename'])

In [None]:
df = ais.merge(model, on='basename', suffixes=('_ais', '_model')).merge(slick, on='basename')
df = df.rename(columns={'fname': 'fname_slick'})
df = df[['basename', 'fname_ais', 'fname_model', 'fname_slick']]

In [None]:
df

In [None]:
lol = df.merge(truth, on='basename')

In [None]:
lol.fillna('DARK')

## Look at some data

In [None]:
row = df.iloc[19]
basename = row.basename

gdf_ais = gpd.read_file(row.fname_ais)
    
gdf_slick = gpd.read_file(row.fname_slick)
df_model = pd.read_csv(row.fname_model)

In [None]:
gdf_slick.dissolve().centroid[0].coords[0][::-1]

In [None]:
gdf_ais_lines = list()
for ssvid, group in gdf_ais.groupby('ssvid'):
    if len(group) > 1:
        ls = shapely.geometry.LineString(group.geometry.tolist())
    else:
        ls = group.iloc[0].geometry
    
    entry = dict()
    entry['ssvid'] = ssvid
    entry['geometry'] = ls
    gdf_ais_lines.append(entry)

gdf_ais_lines = gpd.GeoDataFrame(gdf_ais_lines, crs='EPSG:4326')

In [None]:
fig = plt.figure(dpi=300)
ax = fig.add_subplot(111)
plt.axis('off')
plt.title(basename, fontsize=6)
gdf_ais_lines.plot(column='ssvid', cmap='tab20c', ax=ax, linewidth=0.4, markersize=0.4)
gdf_slick.buffer(0.001).plot(ax=ax, color='black')

In [None]:
s1_id = 'COPERNICUS/S1_GRD'
img = ee.Image(f"{s1_id}/{basename}")