In [None]:
%matplotlib inline

In [None]:
import urllib
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from geoviews.tile_sources import OSM
import plotly_express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import sys

In [None]:
px.set_mapbox_access_token('pk.eyJ1IjoiY2hpYXl1bmMiLCJhIjoiY2tobmN0eHNmMGJrdzJ5azZzNXdkeW01MiJ9.kA4IeOqT59mi3vf7JE1_4w')

In [None]:
# For opening local files
import pathlib

# Test connection
# Make sure you have pip  install  azure-storage-blob==2.1.0 installed
# Do not install 12.1. this is not compatible yet with adlfs
# See https://github.com/dask/adlfs/issues/15
import azure.storage.blob

# this module loads dataframes in parallel
# requires pip install dask[complete] and fastparquet  and python-snappy
import dask.dataframe as dd

# this is for environmental variables for secrets (needs python-dotenv)
# You can copy the  .env.example file and rename it to .env (one directory  up from the notebooks)
# 
%load_ext dotenv
# Load environment variables from the .env file 1 directory up
%dotenv -v

# This should print 2.1.0
azure.storage.blob.__version__

In [None]:
# read the environment variable from the  .env file
sas_token = os.environ['AZURE_BLOB_SAS_TOKEN']

In [None]:
# List the blobs inside the container
print("\nList blobs in the container")
generator = blob_service.list_blobs('chia-yun-results')
for blob in generator:
    print("\t Blob name: " + blob.name)

In [None]:
df = dd.read_parquet(f'abfs://chia-yun-results/waal_201610.parquet', 
                     storage_options={'account_name': 'rwsais', 'sas_token': sas_token})
df = df.compute()

# Statement 1: ships have less cargo capacity

### The relationship between ship draught and water level

Import 2016 water level data and ship draught data

In [None]:
wl16 = pd.read_csv('Water_level\wl_sep_lobith_2016.csv',  
                   parse_dates=[['OBSERVATION DATE', 'DETECTION TIME']],
                   index_col=[0],
                   dayfirst=True)
wl16.rename(columns={'Unnamed: 0':'no', 'NUMERIC VALUE':'waterlevel2016'}, inplace=True)
wl16.drop(columns='no',inplace=True)
#wl16 = wl16.loc['2016-12-01 00:00:00':'2016-12-31 23:55:00']
wl16

In [None]:
fr16 = pd.read_csv('Water_level\lobith_flow_rate_2016.csv',  
                   sep = ';', decimal=',', 
                   parse_dates=[['OBSERVATION DATE', 'DETECTION TIME']], 
                   dayfirst=True, encoding='latin-1', index_col=[0])
fr16.dropna(axis=1, how='all', inplace=True)
fr16.rename(columns={'NUMERIC VALUE':'flow_rate_2016'}, inplace=True)
#fr16 = fr16.loc['2016-12-01 00:00:00':'2016-12-31 23:55:00']
fr16 = fr16[['flow_rate_2016']]
fr16 = fr16[fr16['flow_rate_2016'] < 100000]
fr16

In [None]:
#df = df[df['draughtInland']<10]
df['draughtMarine'].max()

In [None]:
# Drop rows with no draught data and duplicates
draught = df.dropna(subset=['draughtInland']).compute()
draught = draught.drop_duplicates(subset=['mmsi', 'width', 'length', 'draughtInland'])
draught['t'] = pd.to_datetime(draught['timestamplast'], format='%Y-%m-%d %H:%M:%S').dt.tz_localize(None)
draught = draught.set_index('t')
draught = draught.draughtInland

# Group draught values into average by every 10 mins (minimum time gap of water level data)
draught = draught.groupby(pd.Grouper(freq='10min')).mean()

# Join water level and draught data
df = pd.concat([draught, wl16], axis=1)
df

In [None]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
'''connectgaps=True'''
# Add traces
fig.add_trace(
    go.Scatter(x=df.index, y=df.draughtInland, name="Draught",mode='markers'),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df.index, y=df['NUMERIC VALUE'], name="Water level"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Mean draught compare to water level observation"
)

# Set x-axis title
fig.update_xaxes(title_text="time")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Draught</b> (m)", secondary_y=False)
fig.update_yaxes(title_text="<b>Water level</b> (N.A.P. cm)", secondary_y=True)

fig.show()

Correlation of water level and draught in 2016

In [None]:
# Drop no. column
df.drop(columns='no',inplace=True)
df.corr(method='pearson')

Import 2017 water level data and ship draught data

In [None]:
wl17 = pd.read_csv('Water_level\wl_sep_lobith_2017.csv',  
                   parse_dates=[['OBSERVATION DATE', 'DETECTION TIME']],
                   index_col=[0],
                   dayfirst=True)
wl17.rename(columns={'Unnamed: 0':'no','NUMERIC VALUE':'waterlevel2017'}, inplace=True)
wl17.drop(columns='no',inplace=True)
#wl17 = wl17.loc['2017-12-01 00:00:00':'2017-12-31 23:55:00']
wl17

In [None]:
fr17 = pd.read_csv('Water_level\lobith_flow_rate_2017.csv',  
                   sep = ';', decimal=',', 
                   parse_dates=[['OBSERVATION DATE', 'DETECTION TIME']], 
                   dayfirst=True, encoding='latin-1', index_col=[0])
fr17.dropna(axis=1, how='all', inplace=True)
fr17.rename(columns={'NUMERIC VALUE':'flow_rate_2017'}, inplace=True)
#fr17 = fr17.loc['2017-12-01 00:00:00':'2017-12-31 23:55:00']
fr17 = fr17[['flow_rate_2017']]
fr17 = fr17[fr17['flow_rate_2017'] < 100000]
fr17

In [None]:
# Drop rows with no draught data or duplicates
draught = df.dropna(subset=['draughtInland']).compute()
draught = draught.drop_duplicates(subset=['mmsi', 'width', 'length'])
draught['t'] = pd.to_datetime(draught['timestamplast'], format='%Y-%m-%d %H:%M:%S').dt.tz_localize(None)
draught = draught.set_index('t')
draught = draught.draughtInland

# Group draught values into average by every 10 mins (minimum time gap of water level data)
draught = draught.groupby(pd.Grouper(freq='10min')).mean()

# Join water level and draught data
df = pd.concat([draught, wl17], axis=1)

In [None]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=df.index, y=df.draughtInland, name="Draught"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df.index, y=df['NUMERIC VALUE'], name="Water level"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Mean draught compare to water level observation"
)

# Set x-axis title
fig.update_xaxes(title_text="time")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Draught</b> (meters)", secondary_y=False)
fig.update_yaxes(title_text="<b>Water level</b> (N.A.P. meters)", secondary_y=True)

fig.show()

Correlation of water level and draught in 2017

In [None]:
# Drop no. column
df.drop(columns='no',inplace=True)
df.corr(method='pearson')

Plot water level together

In [None]:
# First remove year from datetime
wl16.index = wl16.index.strftime('%m-%d %H:%M')
wl16

In [None]:
wl17.index = wl17.index.strftime('%m-%d %H:%M')
wl17

In [None]:
# Concat two dataframe
wl = pd.merge(wl16, wl17, left_index=True, right_index=True)
wl

In [None]:
fig = go.Figure()
# Create and style traces
fig.add_trace(go.Scatter(x=wl.index, y=wl.waterlevel2016, name='2016'))
fig.add_trace(go.Scatter(x=wl.index, y=wl.waterlevel2017, name='2017'))
fig.update_xaxes(tickmode='auto',nticks=24)
fig.update_layout(title='Water level at Lobith during 2016 and 2017',
                   xaxis_title='Observation time',
                   yaxis_title='Water level (N.A.P. cm)',
                   width=1000, height=500)
#fig.write_image('lobith_water_level_1617.png')
#fig.write_html('lobith_water_level_1617.html')
fig.show()

Plot flow rate

In [None]:
# First remove year from datetime
fr16.index = fr16.index.strftime('%m-%d %H:%M')

In [None]:
fr17.index = fr17.index.strftime('%m-%d %H:%M')

In [None]:
# Concat two dataframe
fr = pd.concat([fr16, fr17], axis=1, sort=True)
fr

In [None]:
fig = go.Figure()
# Create and style traces
fig.add_trace(go.Scatter(x=fr.index, y=fr.flow_rate_2016, name='2016'))
fig.add_trace(go.Scatter(x=fr.index, y=fr.flow_rate_2017, name='2017'))
fig.update_xaxes(tickmode='auto',nticks=24)
fig.update_layout(title='Flow rate at Lobith in 2016 and 2017',
                   xaxis_title='Observation time',
                   yaxis_title='Flow rate (m3/s)',
                   width=1000, height=500)
#fig.write_html('lobith_flow_rate_1617.html')
fig.show()

Plot draught

In [None]:
def draught_cat(df):
    if df['draughtInland'] < 6:
        return 'd<6'
    if (df['draughtInland'] >= 6) & (df['draughtInland'] < 8):
        return '6<=d<8'
    if (df['draughtInland'] >= 8) & (df['draughtInland'] < 10):
        return '8<=d<10'
    if (df['draughtInland'] >= 10) & (df['draughtInland'] < 13):
        return '10<=d<13'
    if df['draughtInland'] >= 13:
        return 'd>=13'

In [None]:
df = df.compute()
draught = df.dropna(subset=['draughtInland'])
draught.drop_duplicates(subset=['new_id','traj_id','draughtInland','mmsi','vesseltype','width','length'],
                        inplace=True)
draught['class'] = draught.apply(draught_cat,axis=1)
draught['date'] = [d.date() for d in draught['t']]
#draught.head(10)

In [None]:
draught = draught[draught['class'] != 'd<6']

In [None]:
fig = px.histogram(draught, x='date', color='class', nbins=20,
                   labels={'date':'Date', 'class':'Draught category'},
                   range_y=[0,27],
                   category_orders={'class':['6<=d<8','8<=d<10','10<=d<13','d>=13']},
                   template='simple_white',
                   title='Number of ships that had draught >= 6 in 2016-12'
                   )

fig.show()
#fig.write_html('draught_class_hist_201612.html')

In [None]:
fig = go.Figure()
# Create and style traces
fig.add_trace(go.Scatter(x=draught.t, y=draught.draughtInland, name='2016'))
fig.add_trace(go.Scatter(x=d17.index, y=d17.draughtInland, name='2017'))

fig.update_xaxes(range= ['12-01 00:00','12:31 23:50'],tickmode='auto',nticks=24)
fig.update_layout(title='Mean draught of every 10 minutes in 2016 and 2017',
                   xaxis_title='Time',
                   yaxis_title='Draught (meters)',
                   width=1000, height=500)
#fig.write_html("Mean draught 1617.html")
fig.show()

# Statement 2: the waterway is busier than usual 

Assign id number to each trajectory by using pandas functions

In [None]:
# Indexing datetime
#df['t'] = pd.to_datetime(df['timestamplast'], format='%Y-%m-%d %H:%M:%S').dt.tz_localize(None)
#df = df.reset_index(drop=False)
#df

In [None]:
"""
Method No.1
Time gap 10 minutes
"""
s = df.sort_values(['new_id','t']).groupby('new_id').t.diff() \
      .gt(pd.Timedelta(seconds=600)).cumsum().sort_index()
df['traj_id'] = df.groupby(['new_id', s], sort=False).ngroup().add(1)

Divide travel direction

In [None]:
df = df.compute()
g = df.groupby('traj_id')
start = g.head(1).sort_values(by='traj_id').reset_index(drop=True).add_prefix('start_')
end = g.tail(1).sort_values(by='traj_id').reset_index(drop=True).add_prefix('end_')
df = pd.concat([start, end], axis=1)

In [None]:
def direction(df):
    if df['start_longitude'] > df['end_longitude']:
        return 'down'
    if df['start_longitude'] < df['end_longitude']:
        return 'up'
    if df['start_longitude'] == df['end_longitude']:
        return 'unknown'

In [None]:
df['dir'] = df.apply(direction, axis=1)
up = df[df['dir'] == 'up']
down = df[df['dir'] == 'down']
up_traj = up['start_traj_id'].tolist()
down_traj = down['start_traj_id'].tolist()

In [None]:
# Reload df
df = df.compute()

up = df[df['traj_id'].isin(up_traj)]
down = df[df['traj_id'].isin(down_traj)]

In [None]:
down.head()

### Number of trips per vessel

In [None]:
df['traj_id'].nunique()

Trips going upstream

In [None]:
# Load upstream dataset
df['traj_id'].nunique()

In [None]:
df[(df['t'] >= '2017-12-15 00:00:00') & (df['t'] <= '2017-12-19 23:59:59')]['traj_id'].nunique()

Trips going downstream

In [None]:
# Load downstream dataset
df['traj_id'].nunique()

In [None]:
df[(df['t'] >= '2017-12-15 00:00:00') & (df['t'] <= '2017-12-19 23:59:59')]['traj_id'].nunique()

### Calcuate the average time for a vessel to travel across the study area

Select trips started from the bound of study area

In [None]:
start = df.groupby('traj_id').head(1)
start = start[start['longitude'] > 6.14]
end = df.groupby('traj_id').tail(1)
end = end[end['longitude'] < 5.75]

Match west side points with east side points

In [None]:
df = pd.merge(left=start, right=end, 
              left_on='traj_id', right_on='traj_id', 
              suffixes=('_start', '_end'))
df.head()

Calculate duration

In [None]:
# Select a small time window
down = df[(df['t_start'] >= '2017-12-15 00:00:00') & (df['t_start'] <= '2017-12-19 23:59:59')]
(down['t_end'] - down['t_start']).mean()

# 2016-10-11 to 15 average travel time: 2:04:49.8
# 2016-10-01 to 31 average travel time: 2:03:48.6

# 2016-12-01 to 05 average travel time: 2:00:00.3
# 2016-12-15 to 19 average travel time: 2:03:27.1
# 2016-12-01 to 31 average travel time: 2:01:51.9

# 2017-10-11 to 15 average travel time: 1:56:23.4
# 2017-10-01 to 31 average travel time: 1:59:20.1

# 2017-12-01 to 05 average travel time: 1:49:33.8
# 2017-12-15 to 19 average travel time: 1:44:43.4
# 2017-12-01 to 31 average travel time: 1:48:23.3

In [None]:
# No. of trips (down)
down['traj_id'].nunique()

# 2016-10-11 to 15: 652
# 2016-10-01 to 31: 3683

# 2016-12-01 to 05: 525
# 2016-12-15 to 19: 614
# 2016-12-01 to 31: 2219

# 2017-10-11 to 15: 462
# 2017-10-01 to 31: 2938

# 2017-12-01 to 05: 442
# 2017-12-15 to 19: 413
# 2017-12-01 to 31: 2492

In [None]:
# Select a small time window
up = df[(df['t_start'] >= '2017-12-15 00:00:00') & (df['t_start'] <= '2017-12-19 23:59:59')]
(up['t_end'] - up['t_start']).mean()

# 2016-10-11 to 15 average travel time: 3:17:58.0
# 2016-10-01 to 31 average travel time: 3:20:46.8

# 2016-12-01 to 05 average travel time: 3:15:36.6
# 2016-12-15 to 19 average travel time: 3:20:14.8
# 2016-12-01 to 31 average travel time: 3:17:42.1

# 2017-10-11 to 15 average travel time: 3:18:31.4
# 2017-10-01 to 31 average travel time: 3:18:26.1

# 2017-12-01 to 05 average travel time: 3:25:29.2
# 2017-12-15 to 19 average travel time: 3:37:10.7
# 2017-12-01 to 31 average travel time: 3:28:42.3

In [None]:
# No. of trips (up)
df['traj_id'].nunique()

# 2016-10-11 to 15: 537
# 2016-10-01 to 31: 2959

# 2016-12-01 to 05: 401
# 2016-12-15 to 19: 429
# 2016-12-01 to 31: 1690

# 2017-10-11 to 15: 389
# 2017-10-01 to 31: 2323

# 2017-12-01 to 05: 348
# 2017-12-15 to 19: 307
# 2017-12-01 to 31: 1834

In [None]:
(df['t_end'] - df['t_start']).mean()

### Count uniquie ship numbers in every hour

In [None]:
# new_id numbers within the Waal 2016-12
mmsi16 = df.groupby(pd.Grouper(key='t',freq='1h')).nunique()
mmsi16 = mmsi16[['new_id']]
mmsi16

In [None]:
# new_id numbers within the Waal 2017-12
mmsi17 = df.groupby(pd.Grouper(key='t',freq='1h')).nunique()
mmsi17 = mmsi17[['new_id']]
mmsi17

In [None]:
mmsi16.index = mmsi16.index.strftime('%m-%d %H:%M')
mmsi16.rename(columns={'new_id': 'ships2016'},inplace=True)

In [None]:
mmsi17.index = mmsi17.index.strftime('%m-%d %H:%M')
mmsi17.rename(columns={'new_id': 'ships2017'},inplace=True)

In [None]:
# Concat 2016 and 2017 table
nship = pd.concat([mmsi16, mmsi17], axis=1, sort=False)
nship

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=nship.index, y=nship.ships2016,
    name='2016'))
fig.add_trace(go.Scatter(
    x=nship.index, y=nship.ships2017,
    name='2017'))

fig.update_layout(
    title_text='Number of unique ships in every hour<br>2016 and 2017 October',
    xaxis_title_text='Time',
    xaxis_nticks=16,
    yaxis_title_text='Number of ships',
    width=1000, height=500)

fig.show()
#fig.write_html("unique_ships_per_hour_oct.html")

### Calculate the trip number per unique vessel (not included in thesis)

In [None]:
trip_no = df.groupby(pd.Grouper(key='mmsi')).nunique()
trip_no = trip_no[['traj_id']]
trip_no

After looking into a specific ship in detail, we found out that not every AIS transmitter sent signal in the right time interval.

In [None]:
trip_no[trip_no['traj_id']>40]

In [None]:
# Plot 
fig = px.violin(trip_no, y='traj_id',box=True,points='all')
fig.update_yaxes(title_text="Number of trips")
fig.show()
#fig.write_html('trips_per_ship2016.html')

In [None]:
# Plot 
fig = px.violin(trip_no, y='traj_id',box=True,points='all')
fig.update_yaxes(title_text="Number of trips")
#fig.show()
#fig.write_html('trips_per_ship2017.html')

# Statement 3: ships reduce speed to minimize the dynamic draft 

### Calculate the average speed of up and down stream

Down stream (west)

In [None]:
df['sog'].mean().compute()

Up stream (east)

In [None]:
df['sog'].mean().compute()

### Identify and visualize vessels' speed change

Use heatmap to plot pixel based SOG View.<br>
Select a small observation area to have a closer look of speed change temporal pattern.

In [None]:
# Outside Vluchthaven Tolkamer
polygon = Polygon([[6.087455749511719, 51.858577350209735], 
                   [6.082735061645508, 51.86101571797086], 
                   [6.073079109191894, 51.85783521205157], 
                   [6.077327728271484, 51.85380624845141], 
                   [6.087455749511719, 51.858577350209735]])
# Outside Erlecom
polygon = Polygon([[5.958538055419922, 51.851287963007],
                   [5.974330902099609, 51.851287963007],
                   [5.974330902099609, 51.857994242687404],
                   [5.958538055419922, 51.857994242687404],
                   [5.958538055419922, 51.851287963007]])
# Groenlanden area
polygon = Polygon([[5.918025970458984, 51.870211415877094], 
                   [5.930900573730469, 51.870211415877094], 
                   [5.930900573730469, 51.87749773748344], 
                   [5.918025970458984, 51.87749773748344], 
                   [5.918025970458984, 51.870211415877094]])
# Waalbrug area
polygon = Polygon([[5.873394012451172, 51.855370165317645], 
                   [5.881590843200683, 51.852931491586446], 
                   [5.8864402770996085, 51.85749064374565], 
                   [5.878243446350098, 51.859823054599026], 
                   [5.873394012451172, 51.855370165317645]])
# Under De Oversteek bridge
polygon = Polygon([[5.841464996337891,51.86128074998202],
                   [5.837602615356445,51.857437633002846],
                   [5.844855308532715,51.854336395851554],
                   [5.848631858825684,51.857093061652186],
                   [5.841464996337891,51.86128074998202]])
# Ewijkse Plaat area
polygon = Polygon([[5.759153366088867,51.88417362103146],
                   [5.756063461303711,51.879352249020855],
                   [5.763959884643555,51.87786864591045],
                   [5.766363143920898,51.882690177025324],
                   [5.759153366088867,51.88417362103146]])

In [None]:
# Turn dask type to dataframe type
df = df.compute()

# GeoDataFrame
gdf = GeoDataFrame(df, crs = 'EPSG:4326', geometry = gpd.points_from_xy(df.longitude, df.latitude))
gdf.head() 

In [None]:
# Define boundary polygon
# Ewijkse Plaat area
polygon = Polygon([[5.759153366088867,51.88417362103146],
                   [5.756063461303711,51.879352249020855],
                   [5.763959884643555,51.87786864591045],
                   [5.766363143920898,51.882690177025324],
                   [5.759153366088867,51.88417362103146]])

# Clip the points inside polygon
#gdf.reset_index(inplace=True)
df = gpd.clip(gdf,polygon)
df.set_index('t', inplace=True)

# Remove abnormal value if needed
df = df[df['sog'] <= 35]

In [None]:
df['sog'].mean()

Heatmaps for week day pattern

In [None]:
# Group mean sog every 30 mins
df = df[['sog']]
df = df.groupby(pd.Grouper(freq='30min')).mean()
df['sog'] = df['sog'].round(1)
df['time'] = [d.time() for d in df.index]
df.head()

In [None]:
fig = go.Figure(data=go.Heatmap(
        z=df.sog,
        x=df.time,
        y=df.index.weekday,
        colorscale='rdylgn',
        zmax=15, zmin=0))

#fig.layout.template = 'simple_white'
fig.update_layout(
    title='Upstream mean speed by half hour of every weekday 2016-12<br>Groenlanden area',
    xaxis_nticks=24, plot_bgcolor='white',
    xaxis_showgrid=False, yaxis_showgrid=False)
fig.show()

Heatmaps for low water levels pattern

In [None]:
# Group mean sog every 30 mins
df = df[['sog']]
df = df.groupby(pd.Grouper(freq='30min')).mean()
df['sog'] = df['sog'].round(2)
df.reset_index(inplace=True)
df['date'] = [d.date() for d in df['t']]
df['time'] = [d.time() for d in df['t']]

df.head()

In [None]:
# Pixel 
fig = go.Figure(data=go.Heatmap(z=df['sog'],
                                x=df['time'],
                                y=df['date'],
                                zmin=0, zmax=15,
                                colorscale='rdylgn'))

fig.update_xaxes()
fig.update_layout(
    title='Downstream mean speed by half hour in 2016-12<br>Ewijkse Plaat area',
    xaxis_nticks=24, yaxis_nticks=15,
    plot_bgcolor='white',
    #xaxis_showgrid=False, yaxis_showgrid=False, 
    width=600, height=500)
fig.show()
#fig.write_html('SOG_everyday_heatmap/SOG down Ewijkse 201612.html')

# Statement 4: smaller vessels shift operation area to the Rhine 

### Count ships in each size group

In [None]:
gdf = GeoDataFrame(df, crs = 'EPSG:4326', geometry = gpd.points_from_xy(df.longitude, df.latitude))
gdf.info() # GeoDataFrame

In [None]:
# The Rhine
polygon = gpd.read_file('rhine.geojson')

# Clip the points inside polygon
df = gpd.clip(gdf,polygon)
df.reset_index(drop=True, inplace=True)
df.head()

In [None]:
# Remove time from timestamp
#df['timestamplast'] = pd.to_datetime(df['timestamplast']).dt.date

# Drop duplicates
df.drop_duplicates(subset=['new_id','traj_id','mmsi','length','width'],
                   keep='first',inplace=True)

# Dropna
df.dropna(subset=['length','width'], inplace=True)

# Reset index
df.reset_index(drop=True, inplace=True)
#df.info()

In [None]:
df['dir'] = 'down'
df['year'] = pd.to_datetime(df['timestamplast']).dt.year
#df.head()

In [None]:
#df16u = df
#df16d = df
#df17u = df
#df17d = df
df = pd.concat([df16u,df16d,df17u,df17d])
df.reset_index(drop=True,inplace=True)
df

In [None]:
fig = px.density_contour(df, x='length', y='width', 
                         facet_col='dir',
                         facet_row='year',
                         labels={'dir':'Direction'},
                         template='none',
                         title='The size of ships around the Rhine<br>December')
#fig.update_xaxes(tickmode='array', tickvals=[0,100,200,300,400,500])
#fig.update_yaxes(tickmode='array', tickvals=[0,10,20,30,40,50])
fig.show()
#fig.write_html('size_rhine_dec.html')

In [None]:
fig = px.density_heatmap(df, x='length', y='width', 
                         log_x=True, log_y=True,
                         facet_col='dir',
                         facet_row='year',
                         labels={'dir':'Direction'},
                         template='none',
                         color_continuous_scale='greys',
                         width=1200, height=800,
                         title='The size of ships around the Rhine at October')
fig.show()

In [None]:
fig.write_html('size_heatmap_rhine_oct_grey.html')

In [None]:
#df["draughtInland"] = df["draughtInland"].astype(float)
fig = px.scatter(df, x='length', y='width',
                 facet_col='dir',
                 facet_row='year',
                 labels={'dir':'Direction'},
                 template='none',
                 title='The size of ships around the Rhine<br>December')
fig.update_xaxes(range=[0, 400])
fig.update_yaxes(range=[0, 50])
fig.show()

In [None]:
fig.write_html('size_scatter_rhine_dec.html')

# Animation (not included in thesis)

In [None]:
# The Waal bend at Nijmegen
polygon = Polygon([[5.838460922241211, 51.847178878957045],
                   [5.889616012573242, 51.847178878957045],
                   [5.889616012573242, 51.86451401480079],
                   [5.838460922241211, 51.86451401480079],
                   [5.838460922241211, 51.847178878957045]])

In [None]:
# Turn dask type to dataframe type
df = df[(df['timestamplast']>='2016-12-16 00:00:00+00:00') & 
        (df['timestamplast']<='2016-12-16 23:59:59+00:00')]
df = df.compute()
df['t'] = pd.to_datetime(df['timestamplast'], format='%Y-%m-%d %H:%M:%S').dt.tz_localize(None)

# GeoDataFrame
gdf = GeoDataFrame(df, crs = 'EPSG:4326', geometry = gpd.points_from_xy(df.longitude, df.latitude))
gdf.info() 

In [None]:
# Define boundary polygon
# The Waal bend at Nijmegen
polygon = Polygon([[5.838460922241211, 51.847178878957045],
                   [5.889616012573242, 51.847178878957045],
                   [5.889616012573242, 51.86451401480079],
                   [5.838460922241211, 51.86451401480079],
                   [5.838460922241211, 51.847178878957045]])

# Clip the points inside polygon
gdf.reset_index(drop=True,inplace=True)
df = gpd.clip(gdf,polygon)
df.set_index('t', inplace=True)

# Remove abnormal value if needed
df = df[df['sog'] < 35]
df.drop(columns='index',inplace=True)
df

In [None]:
df.set_index('t',inplace=True)

In [None]:
tiny = df[(df.index>= '2016-12-16T15:40:00')&(df.index<='2016-12-16T15:55:00')]
tiny

In [None]:
# Plotly Express scatter points map
fig = px.scatter_mapbox(tiny, lat='latitude', lon='longitude', zoom=13, color='sog',
                        color_continuous_scale='plasma',
                        title='The river bend at Nijmegen 2016-12-16',
                        height=1000,width=1200)

fig.update_layout(mapbox_style="dark")
fig.show()
#fig.write_html('nijmegen_20161216_cover.html')

In [None]:
df['timestamplast'] = df['timestamplast'].dt.strftime('%Y-%m-%d %H:%M:00')

In [None]:
tiny = df.loc['2016-12-16 15:15:00':'2016-12-16 16:45:00']

In [None]:
# Plotly Express scatter points map
fig = px.scatter_mapbox(df, lat='latitude', lon='longitude', zoom=13, 
                        title='The river bend at Nijmegen 2016-12-16',
                        animation_frame='timestamplast')
fig.show()
#fig.write_html('nijmegen_animation_20161216.html')

In [None]:
# Plotly Express scatter points map
fig = px.scatter_mapbox(tiny, lat='latitude', lon='longitude', color='sog',
                        color_continuous_scale='plasma', zoom=13, 
                        range_color=[0,20],
                        title='The river bend at Nijmegen 2016-12-16 16:00',
                        animation_frame='timestamplast')
fig.show()


In [None]:
fig.write_html('nijmegen_animation_sog_20161216-1515.html')