# Shapefile to Parquet

In [None]:
import pandas as pd
import fiona

import datashader as ds
from datashader import transfer_functions as tf
from datashader.colors import inferno, Hot, viridis
from colorcet import fire, bmw

In [None]:
shp = r'D:\Data\AIS\Vessel Traffic Data March 2024\cts_srr_03_2024_pt\cts_srr_03_2024_pt\cts_srr_03_2024_pt.shp'

def load(shp):
    with fiona.open(shp) as src:
        for feature in src:
            props = feature['properties']
            geom = feature.geometry
            x, y = geom.coordinates
            yield props['TIMESTAMP'], props['TYPE'], x, y

for row in load(shp):
    print(row)
    break

In [None]:
df = pd.DataFrame.from_records(load(shp), columns=['TS', 'TYPE', 'LON', 'LAT'])

In [None]:
df.head()

In [None]:
def short_type(t):
    """Convert types such as 'Tanker - this', 'Tanker - that' to 'Tanker'."""

    if t is None or t.startswith('unknown'):
        return 'Other'
    elif t.startswith('Towing'):
        return 'Towing'
    elif t.startswith('Local'):
        return 'Local'
    elif t:
        return t.partition('-')[0].strip()
    else:
        return t
    
df['TYPE'] = df['TYPE'].apply(short_type)
df.head()

In [None]:
df.shape

In [None]:
df.to_parquet('D:/Data/AIS/March2024.parquet')

In [None]:
type_cats = df['TYPE'].astype('category')
cats = type_cats.cat.categories
print(len(cats))
cats

In [None]:
print('\n'.join(cats))

In [None]:
df_counts = df.groupby('TYPE', as_index=False).size().sort_values(by='size', ascending=False).reset_index()
df_10 = df_counts.head(10)
df_10

In [None]:
top10 = df[df['TYPE'].isin(df_10['TYPE'])]
top10.shape

In [None]:
minx, miny = df[['LON', 'LAT']].min()
maxx, maxy = df[['LON', 'LAT']].max()
minx, miny, maxx, maxy

In [None]:
west, south = df[['LON', 'LAT']].min()
east, north = df[['LON', 'LAT']].max()
west, south, east, north

In [None]:
west, south, east, north = -58.09099823933421902,143.1142655752374822,-29.08294826079943363,162.9997066670000549
w, h = 291, 424
south, west, north, east = west, south, east, north

In [None]:
x_range = west, east
y_range = south, north
# w, h = 800, 800
# df = taxis.df_count
cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
agg = cvs.points(df, 'LON', 'LAT',  ds.count())
# cmap = bmw if style_name=='nyc_bmw' else fire
cmap = fire
img = tf.shade(agg, cmap=cmap, how='eq_hist')
img = tf.dynspread(img, threshold=0.3, max_px=4)
img#.to_pil()