In [None]:
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
import movingpandas as mpd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import scipy.ndimage as ndi

In [None]:
# Load receiver array positions
receivers_gdf = pd.read_csv('../data/VPS-Station-Locations.csv')
receivers_gdf['geometry'] = gpd.points_from_xy(receivers_gdf.Lng, receivers_gdf.Lat)
receivers_gdf = gpd.GeoDataFrame(receivers_gdf)
receivers_gdf = receivers_gdf.set_crs('EPSG:4326')
receivers_gdf.head()

In [None]:
output = '../graphs'

In [None]:
# Set the filename
# filename = '../data/SharkArray-2020-05-21-thru-05-28.csv'
filename = '../data/SharkArray-01-animals.csv'

# Load shark positions data into a GeoDataFrame
shark_gdf = pd.read_csv(filename)
shark_gdf['t'] = pd.to_datetime(shark_gdf['DATETIME'])
shark_gdf['geometry'] = gpd.points_from_xy(shark_gdf.LON, shark_gdf.LAT)
shark_gdf = gpd.GeoDataFrame(shark_gdf)
shark_gdf = shark_gdf.set_crs('EPSG:4326')
shark_gdf = shark_gdf.set_index('t').tz_localize(None)
shark_gdf.head()

In [None]:
print('The dataset contains', shark_gdf.shape[0], 'rows and', shark_gdf.shape[1], 'columns.')
print('The column names are:', list(shark_gdf.columns.values))
print('The unique transmitter names are:', shark_gdf['TRANSMITTER'].unique())

In [None]:
# Create separate trajectories for each shark based on their transmitter ID
traj_collection = mpd.TrajectoryCollection(shark_gdf, 'TRANSMITTER')
print(traj_collection)

In [None]:
# Get number of points in each trajectory
data = []
for traj in traj_collection:
    data.append([traj.id.split('_')[0], traj.df.shape[0]])
num_points = pd.DataFrame(data, columns=['TRANSMITTER', 'NUM_POSITIONS'])
print(num_points)

In [None]:
# Add a timedelta column which is the time between the previous position and the current position
for traj in traj_collection.trajectories:
    n = traj.df.shape[0]
    timedeltas = [timedelta()] + [traj.df.index[i] - traj.df.index[i - 1] for i in range(1, n)]
    traj.df['TIMEDELTA'] = timedeltas

In [None]:
# Plot timemap of positions
cmap = plt.get_cmap('jet')
N = len(traj_collection.trajectories)
fig, axs = plt.subplots(N + 1, 1, figsize=(5, 5 * (N + 1)))
for i, traj in enumerate(traj_collection.trajectories):
    color = cmap(float(i) / N)
    seconds = np.array([traj.df['TIMEDELTA'][i].total_seconds() for i in range(1, traj.df.shape[0])])
    xcoords = seconds[:-1] / 60
    ycoords = seconds[1:] / 60
    axs[0].plot(xcoords, ycoords, marker='.', ls='', markerfacecolor=color, markeredgecolor=color, label=traj.id.split('_')[0])
    axs[i + 1].plot(xcoords, ycoords, marker='.', ls='', markerfacecolor=color, markeredgecolor=color, label=traj.id.split('_')[0])
    axs[i + 1].set_title('Num points: ' + str(len(seconds)))
ticks = [1, 3, 10, 60, 600, 1440, 14400]
tick_labels = ['1 min', '3 min', '10 min', '1 hr', '10 hr', '1 d', '10 d']
for ax in axs:
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.legend(bbox_to_anchor=(1.04, 1))
    ax.set_xlabel('Time since last position')
    ax.set_ylabel('Time before next position')
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    ax.set_xticklabels(tick_labels)
    ax.set_yticklabels(tick_labels)
axs[0].set_title('Shark Array 01 Time Map')
plt.savefig(output + '/scatter_timemaps_full_dataset', bbox_inches="tight")
plt.show()

In [None]:
def plot_time_map_heat(ax, seconds, xcoords, ycoords, bins, width, title, color_scale='log'):
    transform_data = lambda x: np.log10(x / 60)
    seconds = np.array(seconds)
    xcoords = transform_data(np.array(xcoords))
    ycoords = transform_data(np.array(ycoords))
    
    # Set up heatmap
    H = np.zeros((bins, bins))
    max_diff = transform_data(np.max(seconds))
    x_heat = (bins - 1) * xcoords / max_diff
    y_heat = (bins - 1) * ycoords / max_diff
    for i in range(len(xcoords)):
        H[int(x_heat[i]), int(y_heat[i])] += 1
    H = ndi.gaussian_filter(H, width)

    if color_scale == 'log':
        # Log scale output
        transform_output = lambda x: np.log10(x + 1)
        transform_output_inv = lambda x: 10 ** x - 1
    else:
        # Linear output
        transform_output = lambda x: x
        transform_output_inv = lambda x: x

    H = transform_output(H.T)

    # Plot
    cmap = plt.get_cmap('nipy_spectral_r')
    mappable = ax.imshow(H, origin='lower', extent=(0, max_diff, 0, max_diff), cmap=cmap)
    ax.set_xlim(0, max_diff)
    ax.set_ylim(0, max_diff)
    ax.set_xlabel('Time since last position')
    ax.set_ylabel('Time before next position')
    ax.set_title(title)
    ticks = transform_data(np.array([1, 2, 3, 10, 60, 600, 1440, 14400]) * 60)
    tick_labels = ['1 min', '', '3 min', '10 min', '1 hr', '10 hr', '1 d', '10 d']
    ax.set_xticks(ticks)
    ax.set_xticklabels(tick_labels)
    ax.set_yticks(ticks)
    ax.set_yticklabels(tick_labels)
    ax.set_facecolor(cmap(0))
    
    divider = make_axes_locatable(ax)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    cbar = plt.colorbar(mappable, cax=cax)
    cbar.set_label('Num positions')

    cbar_ticks = np.linspace(0, np.max(H), 10)
    cbar_tick_labels = ['%.2f' % x for x in transform_output_inv(cbar_ticks)]

    cbar.set_ticks(cbar_ticks)
    cbar.set_ticklabels(cbar_tick_labels)

In [None]:
# Extract data for all trajectories
seconds = []
xcoords = []
ycoords = []
for i, traj in enumerate(traj_collection.trajectories):
    seconds += [traj.df['TIMEDELTA'][i].total_seconds() for i in range(1, traj.df.shape[0])]
    xcoords += seconds[:-1]
    ycoords += seconds[1:]

In [None]:
# granular
bins = 50
width = 0

fig, ax = plt.subplots(figsize=(5, 5))
plot_time_map_heat(ax, seconds, xcoords, ycoords, bins, width, 'Shark Array 01 Time Map', color_scale='log')
plt.savefig(output + '/heatmap_time_map_full_dataset', bbox_inches="tight")
plt.show()

In [None]:
# smooth
bins = 256
width = 8

fig, ax = plt.subplots(figsize=(5, 5))
plot_time_map_heat(ax, seconds, xcoords, ycoords, bins, width, 'Shark Array 01 Time Map', color_scale='log')
plt.savefig(output + '/heatmap_time_map_full_dataset_smooth', bbox_inches="tight")
plt.show()

In [None]:
# Plot time map heat maps for all trajectories separately
n = len(traj_collection.trajectories)
fig, axs = plt.subplots(n, 1, figsize=(5, 5 * n))

bins = 50
width = 0

for i, (traj, ax) in enumerate(zip(traj_collection.trajectories, axs)):
    seconds = [traj.df['TIMEDELTA'][i].total_seconds() for i in range(1, traj.df.shape[0])]
    xcoords = seconds[:-1]
    ycoords = seconds[1:]
    title = 'Shark {}: num positions = {}'.format(traj.id, traj.size())
    plot_time_map_heat(ax, seconds, xcoords, ycoords, bins, width, title, color_scale='log')
plt.savefig(output + '/heatmap_time_map_by_shark', bbox_inches="tight")
plt.show()

In [None]:
# Plot time map heat maps for all trajectories separately
trajs = [traj_collection.get_trajectory(traj_id) for traj_id in ['2020-20', '2020-13', '2020-04']]
n = len(trajs)

fig, axs = plt.subplots(1, n, figsize=(5 * n, 5))

bins = 50
width = 0

for i, (traj, ax) in enumerate(zip(trajs, axs)):
    seconds = [traj.df['TIMEDELTA'][i].total_seconds() for i in range(1, traj.df.shape[0])]
    xcoords = seconds[:-1]
    ycoords = seconds[1:]
    title = 'Shark {}: num positions = {}'.format(traj.id, traj.size())
    plot_time_map_heat(ax, seconds, xcoords, ycoords, bins, width, title, color_scale='log')

plt.tight_layout()
plt.savefig(output + '/heatmap_time_map_3_sharks', bbox_inches="tight")
plt.show()

In [None]:
# Create histogram of position data by day
start_time = shark_gdf.index.min()
end_time = shark_gdf.index.max()
delta = end_time - start_time
bins = int(delta.total_seconds() // (3600 * 24))
data = np.array([(shark_gdf.index[i] - start_time).total_seconds() for i in range(shark_gdf.shape[0])]) / (3600 * 24)
plt.hist(data, bins=bins)
# plt.xticks(np.linspace(0, 7, 8) * 24, ['5-21', '5-22', '5-23', '5-24', '5-25', '5-26', '5-27', '5-28'])
plt.gca().set_title('Position data histogram')
plt.show()

In [None]:
start_time = shark_gdf.index.min()
end_time = shark_gdf.index.max()
delta = end_time - start_time
num_bins = int(delta.total_seconds() // (3600 * 24))

cmap = plt.get_cmap('jet')
N = len(traj_collection.trajectories)
fig, axs = plt.subplots(N + 1, 1, figsize=(5, 5 * (N + 1)))

total_data = []
colors = []
labels = []

for (i, traj), ax in zip(enumerate(traj_collection.trajectories), axs[1:]):
    color = cmap(float(i) / N)
    data = np.array([(traj.df.index[i] - start_time).total_seconds() for i in range(traj.size())]) / (3600 * 24)
    label = traj.id.split('_')[0]
    
    total_data.append(data)
    colors.append(color)
    labels.append(label)
    
    ax.hist(data, bins=num_bins, color=color, label=label)
    ax.legend(bbox_to_anchor=(1.04, 1))
    ax.set_title('Num positions: ' + str(traj.size()))

axs[0].hist(total_data, bins=num_bins, color=colors, label=labels, stacked=True, histtype='barstacked')
axs[0].legend(bbox_to_anchor=(1.04, 1))

# Set the same scale for each plot
for ax in axs[1:]:
    ax.set_ylim(axs[0].get_ylim())

# plt.xticks(np.linspace(0, 7, 8) * 24, ['5-21', '5-22', '5-23', '5-24', '5-25', '5-26', '5-27', '5-28'])
axs[0].set_title('Position data histograms')
plt.savefig(output + '/position_data_histograms_full_dataset', bbox_inches="tight")
plt.show()

In [None]:
plt.hist(seconds / 60, bins = list(range(0, 60)))
plt.gca().set_title('Time difference histogram')
plt.gca().set_xlabel('minutes')
plt.show()

In [None]:
# Add velocities and headings to each trajectory
for traj in traj_collection.trajectories:
    traj.add_speed()
    traj.add_direction()

In [None]:
# Compute turning angles
def bound_angle_diff(theta_diff):
    return ((theta_diff - 180) % 360) - 180

for traj in traj_collection.trajectories:
    n = traj.df.shape[0]
    turning_angles = [traj.df['direction'][0]] + [bound_angle_diff(traj.df['direction'][i + 1] - traj.df['direction'][i]) for i in range(1, n - 1)] + [0]
    traj.df['turning_angle'] = turning_angles

In [None]:
traj_collection.trajectories[0].df.head()

In [None]:
# Verify turning angle calculation by showing segment of trajectory
i = 1
traj_collection.trajectories[0].get_linestring_between(traj_collection.trajectories[0].df.index[i - 1], traj_collection.trajectories[0].df.index[i + 1])

In [None]:
# Split by observation gap
obs_gap_trajs_dict = {}
for traj in traj_collection:
    obs_gap_trajs_dict[traj.id.split('_')[0]] = mpd.ObservationGapSplitter(traj).split(gap=timedelta(minutes=60))

In [None]:
# Print lengths of trajectories
obs_gap_lengths_dict = {}
data = []
for name, obs_gap_trajs in obs_gap_trajs_dict.items():
    lengths = np.array([obs_gap_trajs.trajectories[i].size() for i in range(len(obs_gap_trajs.trajectories))])
    obs_gap_lengths_dict[name] = lengths
    print(name)
    print(lengths)
    print('max:', np.max(lengths), 'min:', np.min(lengths), 'avg:', '%.2f' % np.average(lengths), 'sum:', np.sum(lengths), 'len:', len(lengths))
    idx = np.argmax(lengths)
    max_traj = obs_gap_trajs_dict[name].trajectories[idx]
    start_time = max_traj.get_start_time()
    end_time = max_traj.get_end_time()
    data.append([name, np.max(lengths), start_time, end_time, (end_time - start_time).total_seconds() / 60, np.average(lengths), np.sum(lengths), len(lengths)])
obs_gap_lengths_df = pd.DataFrame(data, columns=['TRANSMITTER', 'MAX_LEN', 'MAX_START_TIME', 'MAX_END_TIME', 'TOTAL_MINUTES', 'AVG', 'SUM', 'NUM'])

In [None]:
# obs_gap_lengths_df[['TRANSMITTER', 'MAX_LEN', 'NUM']]
obs_gap_lengths_df

In [None]:
# Print lengths of trajectories
data = []
for name, obs_gap_trajs in obs_gap_trajs_dict.items():
    for traj in obs_gap_trajs:
        start_time = traj.get_start_time()
        end_time = traj.get_end_time()
        data.append([name, start_time, end_time, end_time - start_time, traj.size()])
obs_gap_lengths_df = pd.DataFrame(data, columns=['TRANSMITTER', 'START_TIME', 'END_TIME', 'TIMEDELTA', 'NUM_POSITIONS'])
obs_gap_lengths_df.sort_values('NUM_POSITIONS', ascending=False, inplace=True)
obs_gap_lengths_df.to_csv('../data/3-min-run-trajectories.csv', index=False)

In [None]:
num_bins = 100
cmap = plt.get_cmap('jet')
N = len(traj_collection.trajectories)
fig, axs = plt.subplots(N + 1, 1, figsize=(5, 5 * (N + 1)))

total_data = []
colors = []
labels = []

for (i, traj), ax in zip(enumerate(traj_collection.trajectories), axs[1:]):
    color = cmap(float(i) / N)
    data = np.array(traj.df['speed'])
    label = traj.id.split('_')[0]
    
    total_data.append(data)
    colors.append(color)
    labels.append(label)
    
    ax.hist(data, bins=num_bins, color=color, label=label)
    ax.legend(bbox_to_anchor=(1.04, 1))
    ax.set_title('Num data points: ' + str(traj.size()))

axs[0].hist(total_data, bins=num_bins, color=colors, label=labels, stacked=True, histtype='barstacked')
axs[0].legend(bbox_to_anchor=(1.04, 1))

# Set the same scale for each plot
# for ax in axs[1:]:
#     ax.set_ylim(axs[0].get_ylim())

axs[0].set_title('Speed histograms')
plt.savefig(output + '/speed_histograms_full_dataset', bbox_inches="tight")
plt.show()

In [None]:
num_bins = 100
cmap = plt.get_cmap('jet')
N = len(traj_collection.trajectories)
fig, axs = plt.subplots(N + 1, 1, figsize=(5, 5 * (N + 1)))

total_data = []
colors = []
labels = []

for (i, traj), ax in zip(enumerate(traj_collection.trajectories), axs[1:]):
    color = cmap(float(i) / N)
    data = np.array(traj.df['turning_angle']) * np.pi / 180
    label = traj.id.split('_')[0]
    
    total_data.append(data)
    colors.append(color)
    labels.append(label)
    
    ax.hist(data, bins=num_bins, color=color, label=label)
    ax.legend(bbox_to_anchor=(1.04, 1))
    ax.set_title('Num data points: ' + str(traj.size()))
    ax.set_xlim((-np.pi, np.pi))

axs[0].hist(total_data, bins=num_bins, color=colors, label=labels, stacked=True, histtype='barstacked')
axs[0].legend(bbox_to_anchor=(1.04, 1))
axs[0].set_xlim((-np.pi, np.pi))

# Set the same scale for each plot
# for ax in axs[1:]:
#     ax.set_ylim(axs[0].get_ylim())

axs[0].set_title('Turning angle histograms')
plt.savefig(output + '/turning_angle_histograms_full_dataset', bbox_inches="tight")
plt.show()

In [None]:
num_bins = 100
cmap = plt.get_cmap('jet')
N = len(traj_collection.trajectories)
fig, axs = plt.subplots(N + 1, 1, figsize=(5, 5 * (N + 1)))

total_data = []
colors = []
labels = []

for (i, traj), ax in zip(enumerate(traj_collection.trajectories), axs[1:]):
    color = cmap(float(i) / N)
    data = np.array([depth for depth in traj.df['DEPTH'] if not np.isnan(depth)])
    label = traj.id.split('_')[0]
    
    total_data.append(data)
    colors.append(color)
    labels.append(label)
    
    ax.hist(data, bins=num_bins, color=color, label=label)
    ax.legend(bbox_to_anchor=(1.04, 1))
    ax.set_title('Num data points: {} / {}'.format(str(len(data)), traj.size()))

axs[0].hist(total_data, bins=num_bins, color=colors, label=labels, stacked=True, histtype='barstacked')
axs[0].legend(bbox_to_anchor=(1.04, 1))

# Set the same scale for each plot
for ax in axs[1:]:
    ax.set_xlim(axs[0].get_xlim())

axs[0].set_title('Depth histograms')
plt.savefig(output + '/depth_histograms_full_dataset', bbox_inches="tight")
plt.show()

In [None]:
start_time = shark_gdf.index.min()
end_time = shark_gdf.index.max()
print(start_time, end_time)