In [None]:
import numpy as np
import os
import glob
import cv2
from scipy import signal
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import matplotlib as mpl
from scipy.spatial.distance import euclidean
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KernelDensity

import sys
sys.path.append('.../bats-code')
import bat_functions as bf

In [None]:
plot_folder = '.../bats-data/plots'
save_folder = os.path.join(plot_folder, 'wing-error')
os.makedirs(save_folder, exist_ok=True)

In [None]:
save_folder = '.../bats-data/wing-validation'
images_folder = os.path.join(save_folder, 'validation-images')
info_folder = os.path.join(save_folder, 'validation-csvs')

In [None]:
info_files = sorted(glob.glob(os.path.join(info_folder, '*.csv')))
print(f'found {len(info_files)} .csvs')

In [None]:
info_dfs = [pd.read_csv(f) for f in info_files]

In [None]:
info_df = pd.concat(info_dfs, ignore_index=True)

In [None]:
def get_wingspan(x1, y1, x2, y2):
    p1 = np.array([x1, y1])
    p2 = np.array([x2, y2])
    return euclidean(p1, p2)

def has_groundtruth(x1, x2):
    if x1 < 0 or x2 < 0:
        return False
    return True
    

In [None]:
shift = 0 # loss on each side from not padding during detection (48)
FRAME_WIDTH = 2704 - (2 * shift)
WINGSPAN = .8 # meters, max extent while flying 
HCONST = 1454.9 # pixels

In [None]:
save = True

In [None]:
wing_validation_file = '.../bats-data/wing-validation/combined_wing_validation_info.csv'
wing_correction_info = bf.get_wing_correction_distributions(
    wing_validation_file, num_darkness_bins=4, kde_bw_scale=.25, should_plot=False
)
wing_correction_kdes, darkness_bins = wing_correction_info

lower_percentile = 0.05
upper_percentile = 0.95

measured_wings = []
est_heights = []
wing_samples = 100000


for kde_bin, kde in enumerate(wing_correction_kdes):

    for measured_wing in range(10, 80, 5):
        correction_scales = kde.sample(wing_samples)[:, 0]


        biased_wing = bf.correct_wingspan(np.ones(wing_samples)*measured_wing, 
                                          correction_scales)
    #     # No wingspans smaller than 2 pixels
    #     biased_wing = np.maximum(biased_wing, 2) 
        heights = bf.calculate_height(
            biased_wing, HCONST, WINGSPAN)
        heights = bf.get_middle_percentiles(heights, lower_percentile, upper_percentile)
        est_heights.extend(heights)
        measured_wings.extend([measured_wing for _ in heights])
        #             plt.figure()
        #             _= plt.hist(heights, bins=100)
    plt.figure(figsize=(10, 10))
    sns.violinplot(x=measured_wings, y=est_heights, color='#AAAAAA')
    plt.xlabel('Wingspan (pixels)')
    plt.ylabel('Estimated Height (meters)')
    title = (f"wing span vs height with error " 
             f"darkbin {kde_bin} "
             f"lowwerpercentile {lower_percentile} "
             f"upperpercentile {upper_percentile} "
             f"numsamples {wing_samples}"
            )
    if save:
        bf.save_fig(save_folder, title)
# plt.scatter(x=np.array(measured_wings)/10-3, y=est_heights, alpha=.1)

In [None]:
for kde_bin, kde in enumerate(wing_correction_kdes):
    print(f"kde {kde_bin} --") 

    correction_scales = kde.sample(wing_samples)[:, 0]

    lower_correction = np.percentile(correction_scales, 25)
    upper_correction = np.percentile(correction_scales, 75)
    median_correction = np.median(correction_scales)
    mean_correction = np.mean(correction_scales)
    print(f"mean correction {mean_correction}")
    print(f"median correction {median_correction}")
    print(f"lower correction {lower_correction}")
    print(f"upper correction {upper_correction}")

In [None]:
wing_validation_file = '.../bats-data/wing-validation/combined_wing_validation_info.csv'
wing_correction_info = bf.get_wing_correction_distributions(
    wing_validation_file, num_darkness_bins=4, kde_bw_scale=.25, should_plot=False
)
wing_correction_kdes, darkness_bins = wing_correction_info

lower_percentile = 0.05
upper_percentile = 0.95

measured_wings = []
est_heights = []
wing_samples = 100000

camera_distance = 400

kde_bin = 0

for measured_wing in [7, 24, 48]:
    for kde in wing_correction_kdes[kde_bin:kde_bin+1]:
        print(f"-- wing {measured_wing}, kde {kde_bin} --") 
    
        correction_scales = kde.sample(wing_samples)[:, 0]


        biased_wing = bf.correct_wingspan(np.ones(wing_samples)*measured_wing, 
                                          correction_scales)
    #     # No wingspans smaller than 2 pixels
    #     biased_wing = np.maximum(biased_wing, 2) 
        heights = bf.calculate_height(
            biased_wing, HCONST, WINGSPAN)
        multipliers = bf.combined_bat_multiplier(FRAME_WIDTH, WINGSPAN, 
                               biased_wing, 
                               camera_distance
                              )
        #             plt.figure()
        #             _= plt.hist(heights, bins=100)
        lower_height = np.percentile(heights, 25)
        upper_height = np.percentile(heights, 75)
        median_height = np.median(heights)
        print(f"median height {median_height}")
        print(f"lower height {lower_height}")
        print(f"upper height {upper_height}")
        
        
        raw_heights = bf.calculate_height(
            measured_wing, HCONST, WINGSPAN)
        raw_multiplier = bf.combined_bat_multiplier(FRAME_WIDTH, WINGSPAN, 
                               measured_wing, 
                               camera_distance
                              )
        
        print(f"raw height {raw_heights}")
       
        
        lower_multiplier = np.percentile(multipliers, 25)
        upper_multiplier = np.percentile(multipliers, 75)
        median_multiplier = np.median(multipliers)
        
        print('***')
        print(f"median multiplier {median_multiplier}")
        print(f"lower multiplier {lower_multiplier}")
        print(f"upper multiplier {upper_multiplier}")
        print(f"raw multiplier {raw_multiplier}")
        

In [None]:
wing_validation_file = '.../bats-data/wing-validation/combined_wing_validation_info.csv'
wing_correction_info = bf.get_wing_correction_distributions(
    wing_validation_file, num_darkness_bins=4, kde_bw_scale=.25, should_plot=False
)
wing_correction_kdes, darkness_bins = wing_correction_info

lower_percentile = 0.0
upper_percentile = 1.0

camera_distance = 400

measured_wings = []
all_multipliers = []

wing_samples = 100000
kde = wing_correction_kdes[3]
for measured_wing in range(10, 80, 5):
    correction_scales = kde.sample(wing_samples)[:, 0]


    biased_wing = bf.correct_wingspan(np.ones(wing_samples)*measured_wing, 
                                      correction_scales)
    multipliers = bf.combined_bat_multiplier(FRAME_WIDTH, WINGSPAN, 
                               biased_wing, 
                               camera_distance
                              )
#     # No wingspans smaller than 2 pixels
#     biased_wing = np.maximum(biased_wing, 2) 

    multipliers = bf.get_middle_percentiles(multipliers, 
                                        lower_percentile, 
                                        upper_percentile)
    all_multipliers.extend(multipliers)
    measured_wings.extend([measured_wing for _ in multipliers])
    #             plt.figure()
    #             _= plt.hist(heights, bins=100)
plt.figure(figsize=(20, 6))
sns.violinplot(x=measured_wings, y=all_multipliers, color='#AAAAAA', bw=.1)
plt.xlabel('Wingspan (pixels)')
plt.ylabel('Estimated bat multiplier')
title = f"Wing span vs multiplier camera distance {camera_distance} middle 90 percent"
if save:
    bf.save_fig(save_folder, title)


In [None]:
ing_validation_file = './bats-data/wing-validation/combined_wing_validation_info.csv'
wing_correction_info = bf.get_wing_correction_distributions(
    wing_validation_file, num_darkness_bins=4, kde_bw_scale=.25, should_plot=False
)
wing_correction_kdes, darkness_bins = wing_correction_info

lower_percentile = 0.0
upper_percentile = 1.0

measured_wings = []
all_multipliers = []
wing_samples = 10000
kde = wing_correction_kdes[3]
for measured_wing in [10, 50]:
    correction_scales = kde.sample(wing_samples)[:, 0]
    
    biased_wing = bf.correct_wingspan(np.ones(wing_samples)*measured_wing, 
                                      correction_scales)
    multipliers = bf.combined_bat_multiplier(FRAME_WIDTH, WINGSPAN, 
                               biased_wing, 
                               camera_distance
                              )
#     # No wingspans smaller than 2 pixels
#     biased_wing = np.maximum(biased_wing, 2) 

    multipliers = bf.get_middle_percentiles(multipliers, 
                                        lower_percentile, 
                                        upper_percentile)
    all_multipliers.extend(multipliers)
    measured_wings.extend([measured_wing for _ in multipliers])


    #             plt.figure()
    #             _= plt.hist(heights, bins=100)
plt.figure(figsize=(10, 10))
sns.violinplot(x=measured_wings, y=all_multipliers, color='#AAAAAA')
plt.xlabel('Wingspan (pixels)')
plt.ylabel('bat multipliers')
title = "Wing span vs height with error"

In [None]:
len(measured_wings), len(est_multipliers)

In [None]:
wing_validation_file = '.../bats-data/wing-validation/combined_wing_validation_info.csv'
wing_correction_info = bf.get_wing_correction_distributions(
    wing_validation_file, num_darkness_bins=4, kde_bw_scale=.25, should_plot=False
)
wing_correction_kdes, darkness_bins = wing_correction_info

lower_percentile = 0.0
upper_percentile = 1.0

measured_wings = []
est_heights = []
wing_samples = 100000
kde = wing_correction_kdes[3]
for measured_wing in [50]:
    correction_scales = kde.sample(wing_samples)[:, 0]


    biased_wing = bf.correct_wingspan(np.ones(wing_samples)*measured_wing, 
                                      correction_scales)
#     # No wingspans smaller than 2 pixels
#     biased_wing = np.maximum(biased_wing, 2) 
    heights = bf.calculate_height(
        biased_wing, HCONST, WINGSPAN)
    heights = bf.get_middle_percentiles(heights, lower_percentile, upper_percentile)
    est_heights.extend(heights)
    measured_wings.extend([measured_wing for _ in heights])
    #             plt.figure()
    #             _= plt.hist(heights, bins=100)
plt.figure(figsize=(10, 10))
sns.violinplot(x=measured_wings, y=est_heights, color='#AAAAAA')
plt.xlabel('Wingspan (pixels)')
plt.ylabel('Estimated Height (meters)')
title = "Wing span vs height with error"

In [None]:
root_folder = ".../kasanka-bats/processed/deep-learning"
observations_root = os.path.join(root_folder, "observations")
all_observations = {}
day_folders = sorted(glob.glob(os.path.join(observations_root, '*')))
for day_folder in day_folders:
    obs_files = sorted(glob.glob(os.path.join(day_folder, '*.npy')))
    date = os.path.basename(day_folder)
    all_observations[date] = {}
    for obs_file in obs_files:
        camera = os.path.splitext(obs_file)[0].split('-')[-1]
        obs = np.load(obs_file, allow_pickle=True)
        # .item() to get dict from inside the array that was wrapped around
        # it when using np.save()
        all_observations[date][camera] = obs.item()
        
# Remove observations to exclude (because camera ran out of batteries etc.)
exclude=True
# Manually exclude cameras that had issues
all_observations['17Nov']['MusoleParking']['exclude'] = True
all_observations['18Nov']['MusolePath']['exclude'] = True
all_observations['20Nov']['MusolePath']['exclude'] = True
if exclude:
    good_obs = {}
    for date, day_obs in all_observations.items():
        good_obs[date] = {}
        for camera, obs in day_obs.items():
            if 'exclude' in obs.keys():
                if obs['exclude']:
                    continue
            good_obs[date][camera] = obs
    all_observations = good_obs

In [None]:
all_observations['16Nov']['BBC']['mean_wing']

In [None]:
all_wing = []
short_wing = []
long_wing = []
track_length_thresh = 5
for date, day_obs in all_observations.items():
    for camera, obs in day_obs.items():
        for wing, length in zip(obs['mean_wing'], obs['track_length']):
            if length < track_length_thresh:
                short_wing.append(wing)
            else:
                long_wing.append(wing)
        all_wing.extend(obs['mean_wing'])

In [None]:
all_wing = np.array(all_wing)
print(len(all_wing[all_wing>150])/len(all_wing))
print(np.max(all_wing))
print(len(all_wing[all_wing>150]), len(all_wing))

In [None]:
all_wing[all_wing==11].shape

In [None]:
import scipy.stats
print(f'mean: {np.mean(all_wing)}, '
      f'mode: {scipy.stats.mode(all_wing.astype(int))}',
      f' median: {np.median(all_wing)}')

print(np.percentile(all_wing, 2.5), np.percentile(all_wing, 97.5))

In [None]:
colors = ["#393939", "#909090"]

long_wing = np.array(long_wing)
short_wing = np.array(short_wing)

wing_thresh = 80

_ = plt.hist([long_wing[long_wing<wing_thresh], short_wing[short_wing<wing_thresh]], bins=200, 
             stacked=True, label=['long tracks (>= 5 frames)', 'short tracks (< 5 frames)'], color=[(0, 0, 0), (.75, .75, .75)])
# plt.xlim(0, darkness_threshold)
# plt.xticks(np.arange(0, 170, 20), np.arange(0, 170, 20))
plt.xlabel('Estimated wingspan (pixels)')
plt.ylabel('Number of tracks')
plt.legend()
            
plot_title = "Track wingspan distribution with short tracks shown"
bf.save_fig(save_folder, plot_title, fig=None)


In [None]:
wing_validation_file = '.../bats-data/wing-validation/combined_wing_validation_info.csv'
wing_correction_info = bf.get_wing_correction_distributions(
    wing_validation_file, num_darkness_bins=4, kde_bw_scale=.25, should_plot=False
)
wing_correction_kdes, darkness_bins = wing_correction_info

lower_percentile = 0.0
upper_percentile = 1.0

measured_wings = []
est_heights = []
wing_samples = 100000
kde = wing_correction_kdes[3]
for measured_wing in [48]:
    correction_scales = kde.sample(wing_samples)[:, 0]


    biased_wing = bf.correct_wingspan(np.ones(wing_samples)*measured_wing, 
                                      correction_scales)
#     # No wingspans smaller than 2 pixels
#     biased_wing = np.maximum(biased_wing, 2) 
    heights = bf.calculate_height(
        biased_wing, HCONST, WINGSPAN)
    heights = bf.get_middle_percentiles(heights, lower_percentile, upper_percentile)
    est_heights.extend(heights)
    measured_wings.extend([measured_wing for _ in heights])
    #             plt.figure()
    #             _= plt.hist(heights, bins=100)
plt.figure(figsize=(10, 10))
sns.violinplot(x=measured_wings, y=est_heights, color='#AAAAAA')
plt.xlabel('Wingspan (pixels)')
plt.ylabel('Estimated Height (meters)')
title = "Wing span vs height with error"




In [None]:
np.median(heights), np.mean(heights), np.percentile(heights, 25), np.percentile(heights, 75)

In [None]:
info_df.shape

In [None]:
frame_root_folder = '.../Elements/bats'
last_camera = None
last_date = None
frames = None

info_df.shape[0]

for i in range(info_df.shape[0]):
    if i % 300 == 0:
        print(f'{i}!')
    date = info_df.loc[i, 'date']
    camera = info_df.loc[i, 'camera']
    
    if date != last_date or camera != last_camera:
        images_folder = os.path.join(frame_root_folder, 
                                     date, camera, '*/*.jpg')
        frames = sorted(glob.glob(images_folder))
        last_camera = camera
        last_date = date
    frame = plt.imread(frames[info_df.loc[i, 'frame']])
    darkness = np.mean(frame[..., 2])
    info_df.loc[i, 'darkness'] = darkness

In [None]:
info_df['wingspan_gt'] = info_df.apply(
    lambda x: get_wingspan(x['wingtip1_x'], x['wingtip1_y'], 
                           x['wingtip2_x'], x['wingtip2_y']),
    axis=1)

info_df['has_gt'] = info_df.apply(
    lambda x: has_groundtruth(x['wingtip1_x'], x['wingtip2_x']),
    axis=1)

In [None]:
info_df['error_pixels'] = (info_df['wing_estimate'] - info_df['wingspan_gt'])
info_df['error_norm'] = info_df['error_pixels'] / info_df['wing_estimate']

In [None]:
save_name = os.path.join(save_folder, 
                         "combined_wing_validation_info.csv")
info_df.to_csv(save_name, index_label=False)

In [None]:
(info_df.loc[info_df['has_gt'], 'error_pixels']).hist(bins=100)
plt.figure()
(info_df.loc[info_df['has_gt'], 'error_norm']).hist(bins=100)

In [None]:
print('pixels mean', info_df.loc[info_df['has_gt'], 'error_pixels'].mean())
print('pixels norm mean', info_df.loc[info_df['has_gt'], 'error_norm'].mean())

In [None]:
X = info_df.loc[info_df['has_gt'], 'wingspan_gt'].values.reshape(-1, 1)
y = info_df.loc[info_df['has_gt'], 'error_pixels']
reg = LinearRegression().fit(X, y)

In [None]:
reg.coef_, reg.intercept_
print(f'R2: {reg.score(X, y)}')

In [None]:
plt.figure(figsize=(10, 10))
plt.scatter(info_df.loc[info_df['has_gt'] & (info_df['error_norm']>-1), 
                        'wing_estimate'], 
            info_df.loc[info_df['has_gt'] & (info_df['error_norm']>-1), 
                        'error_norm'], 
            alpha=.3)

plt.figure(figsize=(10, 10))
plt.scatter(info_df.loc[info_df['has_gt'] & (info_df['error_norm']>-1), 
                        'wing_estimate'], 
            info_df.loc[info_df['has_gt'] & (info_df['error_norm']>-1), 
                        'error_pixels'], 
            alpha=.3)

In [None]:
X = info_df.loc[info_df['has_gt'], 'darkness'].values.reshape(-1, 1)
y = info_df.loc[info_df['has_gt'], 'error_norm']
reg = LinearRegression().fit(X, y)

In [None]:
plt.figure(figsize=(10, 10))
plt.scatter(info_df.loc[info_df['has_gt'] & (info_df['error_norm'] > -10), 
                        'darkness'], 
            info_df.loc[info_df['has_gt'] & (info_df['error_norm'] > -10), 
                        'error_norm'], 
            alpha=.3)

In [None]:

error_normed = info_df.loc[info_df['has_gt'] & (info_df['darkness'] > 0), 
                           'error_norm'].values

error_normed = error_normed[:, np.newaxis]
error_normed_hist = np.histogram(error_normed, bins=100)


In [None]:
kde = KernelDensity(kernel='gaussian', bandwidth=0.03).fit(error_normed)
kde.get_params()

In [None]:
kde = KernelDensity(kernel='gaussian', bandwidth=0.03).fit(error_normed)
sorted_error = np.sort(error_normed, axis=0)
log_dens = kde.score_samples(sorted_error)

fig, ax1 = plt.subplots()

color = 'tab:red'
ax1.hist(sorted_error, bins=200)

fig, ax2 = plt.subplots()
ax2.plot(sorted_error, np.exp(log_dens), c='cyan')

In [None]:
values = kde.sample(10000)
plt.hist(values, bins=100)

In [None]:
cond = (info_df['has_gt']) & (info_df['darkness'] < 50)
plt.scatter(info_df.loc[cond, 'wing_estimate'],
            info_df.loc[cond, 'error_pixels'] / info_df.loc[cond, 'wing_estimate'],
           alpha=.1)
plt.ylim(-1)
# plt.figure(figsize=(10, 10))
cond = (info_df['has_gt']) & (info_df['darkness'] > 0)
plt.scatter(info_df.loc[cond, 'wing_estimate'],
            info_df.loc[cond, 'error_pixels'] / info_df.loc[cond, 'wing_estimate'],
           alpha=.1)
plt.ylim(-1)

plt.title((info_df.loc[cond, 'error_pixels'] 
           / info_df.loc[cond, 'wing_estimate']).mean())

In [None]:
X = info_df.loc[info_df['has_gt'], 'darkness'].values.reshape(-1, 1)
y = info_df.loc[info_df['has_gt'], 'error_pixels']
darkness_reg = LinearRegression().fit(X, y)

In [None]:
print(darkness_reg.coef_, darkness_reg.intercept_)
print(f'R2: {darkness_reg.score(X, y)}')

In [None]:
plt.figure(figsize=(10, 10))
plt.scatter(info_df.loc[info_df['has_gt'], 'darkness'], 
            info_df.loc[info_df['has_gt'], 'error_norm'],
            alpha=.1)
plt.ylim(-1)
# plt.scatter(info_df.loc[~info_df['has_gt'], 'darkness'], 
#             info_df.loc[~info_df['has_gt'], 'wing_estimate'],
#             alpha=.3)

In [None]:
bins = 50
dark_bins = 5

error_array = np.zeros((bins, dark_bins))

max_darkness = np.max(info_df.loc[info_df['has_gt'], 'darkness'])
dark_bins_values = np.linspace(0, max_darkness, 
                               num=dark_bins+1,
                               endpoint=True)
dark_bins_values = dark_bins_values[1:]

In [None]:
hist_out = np.histogram2d(info_df.loc[info_df['has_gt'] & (info_df['error_norm'] > -1), 'darkness'],
                          info_df.loc[info_df['has_gt'] & (info_df['error_norm'] > -1), 'error_norm'],
                          bins=[4, 30], density=False)

H, xedges, yedges = hist_out
H = H.T

In [None]:
for r in range(H.shape[1]):
    kde = KernelDensity(kernel='gaussian', bandwidth=0.03).fit(error_normed)
    sorted_error = np.sort(error_normed, axis=0)
    log_dens = kde.score_samples(sorted_error)

    fig, ax1 = plt.subplots()

    color = 'tab:red'
    ax1.hist(sorted_error, bins=200)

    fig, ax2 = plt.subplots()
    ax2.plot(sorted_error, np.exp(log_dens), c='cyan')

In [None]:
fig = plt.figure(figsize=(14, 6))
ax = fig.add_subplot(132, title='pcolormesh: actual edges',
        aspect='equal')
X, Y = np.meshgrid(xedges, yedges*100)
ax.pcolormesh(X, Y, H)

for r in range(H.shape[1]):
    print(np.sum(H[:, r]))
    H[:, r] /= np.sum(H[:, r])
    
#     print(np.sum(H[r]))

fig = plt.figure(figsize=(14, 6))
ax = fig.add_subplot(132, title='pcolormesh: actual edges',
        aspect='equal')
X, Y = np.meshgrid(xedges, yedges*100)
ax.pcolormesh(X, Y, H)

In [None]:
plt.imshow(H)

In [None]:
fig = plt.figure(figsize=(14, 6))
ax = fig.add_subplot(132, title='pcolormesh: actual edges',
        aspect='equal')
X, Y = np.meshgrid(xedges, yedges*100)
ax.pcolormesh(X, Y, H)

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(hist)
plt.ylabel(y_edge)
plt.xlabel(x_edge)

In [None]:
bin_min_darkness = 0
for bin_max_darkness in dark_bins_values:
    valid_rows = ((info_df['darkness'] > bin_min_darkness) 
                  & (info_df['darkness'] <= bin_max_darkness))
    valid_rows = (info_df['has_gt'] & valid_rows)

    pixel_error = info_df.loc[valid_rows, 'error_pixels']
    

In [None]:
plt.figure(figsize=(10, 10))
info_df.loc[info_df['has_gt'], 'darkness'], 
info_df.loc[info_df['has_gt'], 'error_pixels'],
