In [None]:
import numpy as np
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KernelDensity

In [None]:
plots_save_folder = '.../bats-data/plots/wing-validation'
os.makedirs(plots_save_folder, exist_ok=True)

val_csv_file = os.path.join(
    '.../bats-data/wing-validation/combined_wing_validation_info.csv')
wing_df = pd.read_csv(val_csv_file)

In [None]:
def save_fig(save_folder, plot_title, fig=None):
    plot_name = plot_title.replace(' ', '-')
    file = os.path.join(save_folder, plot_name+'.png')
    if fig:
        fig.savefig(file, bbox_inches='tight', dpi=600)
        return
    
    plt.savefig(file, bbox_inches='tight', dpi=600)
    

In [None]:
plt.scatter(wing_df.loc[wing_df['has_gt'] & (wing_df['error_norm']), 
                        'wing_estimate'], 
            wing_df.loc[wing_df['has_gt'] & (wing_df['error_norm']), 
                        'error_norm'], 
            alpha=.3)

In [None]:
plt.scatter(wing_df.loc[wing_df['has_gt'], 'darkness'], 
            wing_df.loc[wing_df['has_gt'], 'error_norm'],
            alpha=.1)
# plt.ylim(-1)

In [None]:
wing_df.loc[rows_in_bin]

In [None]:
save_fig = False

max_darkness = wing_df.loc[wing_df['has_gt'], 'darkness'].max()
min_darkness = wing_df.loc[wing_df['has_gt'], 'darkness'].min()
print(f"Max darkness {max_darkness}, min darkness {min_darkness}")

num_bins = 4
dark_bins = np.linspace(0, max_darkness, num_bins+1)

print(dark_bins)

all_errors = []
all_bin_labels = []


for date in ['16Nov', '17Nov']:
    rows_in_bin = (wing_df['has_gt'] 
                   & (wing_df['date'] == date)
                  )
    date_df = wing_df.loc[rows_in_bin]
    
    
    for camera in date_df['camera'].unique():
        fig, ax1 = plt.subplots()
        errors = date_df.loc[date_df['camera']==camera, 'error_norm'].values
        darkness = date_df.loc[date_df['camera']==camera, 'darkness'].values
        plt.scatter(darkness, errors)
#         error_std = errors.std()
#         print(np.mean(errors), error_std)
#         bw_scale = .5

#         kde = KernelDensity(
#             kernel='gaussian', bandwidth=error_std*bw_scale).fit(errors[..., np.newaxis])
#         sorted_error = np.sort(errors, axis=0)
#         log_dens = kde.score_samples(sorted_error[..., np.newaxis])



#         color = 'tab:red'
#         ax1.hist(sorted_error, bins=50, density=True)
#     #     ax2=ax1.twinx()
#         ax1.plot(sorted_error, np.exp(log_dens), c='cyan')
#         ax1.set_title(f"num tracks {len(errors)}, mean {np.mean(errors)}, variance {np.var(errors)}")

#         all_errors.extend(errors)
#         all_bin_labels.extend([bin_num for _ in errors])
# plt.figure()
# sns.violinplot(all_bin_labels, all_errors, bw=bw_scale)
# plt.axhline(0.0, c=(.25, .25, .25), linestyle='--')

# xtick_labels = [f'{int(dark_bins[i])+1}:{int(dark_bins[i+1])}' for i in range(num_bins)]
# plt.xticks(ticks=np.arange(num_bins), labels=xtick_labels)
# plt.xlabel('Average pixel value of frame (blue channel) ranges')
# plt.ylabel('Wing error correction factor')
            
# plot_title = "Wing validation bin distributions"
# # plt.title(plot_title)
# if save_fig:
#     save_fig(plots_save_folder, plot_title, fig=None)

In [None]:
np.sum(wing_df['error_norm'] < -1) / np.sum(wing_df['error_norm'] > -100)

In [None]:
should_save = True

max_darkness = wing_df.loc[wing_df['has_gt'], 'darkness'].max()
min_darkness = wing_df.loc[wing_df['has_gt'], 'darkness'].min()
print(f"Max darkness {max_darkness}, min darkness {min_darkness}")

num_bins = 4
dark_bins = np.linspace(0, max_darkness, num_bins+1)

print(dark_bins)

location = 18.5

all_errors = []
all_bin_labels = []

min_thresh = -100

for bin_num in range(num_bins):
    fig, ax1 = plt.subplots()
    rows_in_bin = (wing_df['has_gt'] 
                   & (wing_df['darkness'] > dark_bins[bin_num])
                   & (wing_df['darkness'] <= dark_bins[bin_num+1])
                   & (wing_df['error_norm'] > min_thresh)
                  )
    errors = wing_df.loc[rows_in_bin, 'error_norm'].values
    error_std = errors.std()

    bw_scale = .5

    kde = KernelDensity(
        kernel='gaussian', bandwidth=error_std*bw_scale).fit(errors[..., np.newaxis])
    sorted_error = np.sort(errors, axis=0)
    log_dens = kde.score_samples(sorted_error[..., np.newaxis])



    color = 'tab:red'
    ax1.hist(sorted_error, bins=50, density=True)
#     ax2=ax1.twinx()
    ax1.plot(sorted_error, np.exp(log_dens), c='cyan')
    ax1.set_title(f"num tracks {len(errors)},"
                  f" mean {np.mean(errors)},"
                  f" variance {np.var(errors)},"
                  f"std {np.std(errors)}")

    print(f"bin {bin_num} --") 

    lower_correction = np.percentile(errors, 25)
    upper_correction = np.percentile(errors, 75)
    median_correction = np.median(errors)
    mean_correction = np.mean(errors)
    print(f"mean correction {mean_correction}")
    print(f"median correction {median_correction}")
    print(f"lower correction {lower_correction}")
    print(f"upper correction {upper_correction}")

    all_errors.extend(errors)
    all_bin_labels.extend([int(bin_num*location) for _ in errors])
        
colors = ["#000025", "#00004B", "#000071", "#000096"]
colors = ["#121212", "#393939", "#5F5F5F", "#858585"]
# Set your custom color palette
sns.set_palette(sns.color_palette(colors))
        
plt.figure()
ax = sns.violinplot(all_bin_labels, all_errors, bw=bw_scale)

for c in ax.collections[1::2]:
    c.set_facecolor('black')
    c.set_edgecolor('black')
for c in ax.collections[::2]:
    c.set_edgecolor('face')
    
for l in ax.lines:
    l.set_color('white')
    
plt.axhline(0.0, c=(.0, .0, .0), linestyle=':')

xtick_labels = [f'{int(dark_bins[i])+1}:{int(dark_bins[i+1])}' for i in range(num_bins)]
xtick_labels.append('')
plt.xticks(ticks=np.arange(num_bins+1), labels=xtick_labels)
plt.xlabel('Average pixel value of frame (blue channel) ranges')
plt.ylabel('Wing error correction factor')
            
plot_title = f"Wing validation bin distributions min cutoff {min_thresh}"
# plt.title(plot_title)
if should_save:
    save_fig(plots_save_folder, plot_title, fig=None)

In [None]:
# fig, ax = plt.subplots(1,1)
x = np.ones(100)
x[:50] = 2
y = np.arange(100)
axis = sns.violinplot(x=x, y=y)
for c in axis.collections[1::2]:
    c.set_facecolor('black')
    c.set_edgecolor('black')
for l in axis.lines:
    l.set_color('white')
# lines = [ line for axis in figure.axes for line in axis.get_lines()+axis.collections]

### Check day differences in distributions

In [None]:
should_save = False

max_darkness = wing_df.loc[wing_df['has_gt'], 'darkness'].max()
min_darkness = wing_df.loc[wing_df['has_gt'], 'darkness'].min()
print(f"Max darkness {max_darkness}, min darkness {min_darkness}")

num_bins = 4
dark_bins = np.linspace(0, max_darkness, num_bins+1)

print(dark_bins)

location = 18.5

all_errors = []
all_bin_labels = []



for bin_num in range(num_bins):
    fig, ax1 = plt.subplots()
    for date in ['16Nov', '17Nov']:
        rows_in_bin = (wing_df['has_gt'] 
                       & (wing_df['darkness'] > dark_bins[bin_num])
                       & (wing_df['darkness'] <= dark_bins[bin_num+1])
                       & (wing_df['error_norm'] > -1)
                       & (wing_df['date'] == date)
                      )
        errors = wing_df.loc[rows_in_bin, 'error_norm'].values
        error_std = errors.std()

        bw_scale = .5

        kde = KernelDensity(
            kernel='gaussian', bandwidth=error_std*bw_scale).fit(errors[..., np.newaxis])
        sorted_error = np.sort(errors, axis=0)
        log_dens = kde.score_samples(sorted_error[..., np.newaxis])

        

        color = 'tab:red'
        ax1.hist(sorted_error, bins=50, density=True)
    #     ax2=ax1.twinx()
        ax1.plot(sorted_error, np.exp(log_dens), c='cyan')
        ax1.set_title(f"num tracks {len(errors)},"
                      f" mean {np.mean(errors)},"
                      f" variance {np.var(errors)},"
                      f"std {np.std(errors)}")

        all_errors.extend(errors)
        all_bin_labels.extend([int(bin_num*location) for _ in errors])

In [None]:
day_folders = sorted(glob.glob('.../kasanka-bats/processed/deep-learning/*Nov'))
observations = {}
for day_folder in day_folders[:]:
    print(day_folder)

    date = os.path.basename(day_folder)
    track_files = sorted(glob.glob(os.path.join(day_folder, '*/crossing_tracks.npy')))
    observations[date] = {}
    
    for track_file in track_files: 
        camera = track_file.split('/')[-2]
        obs = {'date': date,
               'camera': camera,
               }
        crossing_tracks_list = np.load(track_file, allow_pickle=True)
        darkness_means = np.load(os.path.join(day_folder, camera, 'blue-means.npy'))
        passing_bat_frame = []
        passing_bat_size = []
        passing_bat_track_ind = []
        passing_bat_direction = []
        passing_bat_track_id = []
        passing_bat_darkness = []
        passing_track_length = []
        for track_ind, track in enumerate(crossing_tracks_list):
            if track['crossed'] > 0:
                passing_bat_frame.append(track['crossed'])
                passing_bat_size.append(track['mean_wing'])
                passing_bat_track_id.append(track_ind)
                passing_bat_direction.append(1)
                passing_bat_darkness.append(darkness_means[track['crossed']])
                passing_track_length.append(len(track['track']))

            elif track['crossed'] < 0:
                passing_bat_frame.append(track['crossed'])
                passing_bat_size.append(track['mean_wing'])
                passing_bat_track_id.append(track_ind)
                passing_bat_direction.append(-1)
                passing_bat_darkness.append(darkness_means[-track['crossed']])
                passing_track_length.append(len(track['track']))

        obs['frames'] = np.array(passing_bat_frame)
        obs['mean_wing'] = np.array(passing_bat_size)
        obs['ids'] = np.array(passing_bat_track_id)
        obs['direction'] = np.array(passing_bat_direction)
        obs['darkness'] = np.array(passing_bat_darkness)
        obs['track_length'] = np.array(passing_track_length)
        observations[date][camera] = obs

In [None]:
observation_root = '.../kasanka-bats/processed/deep-learning/observations'
for date, day_obs in observations.items():
    day_folder = os.path.join(observation_root, date)
    os.makedirs(day_folder, exist_ok=True)
    for cam_ind, (cam_name, obs) in enumerate(day_obs.items()):
        obs_name = f'{date}-observation-{cam_name}.npy'
        obs_file = os.path.join(day_folder, obs_name)
        np.save(obs_file, obs)

In [None]:
all_darkness = []
short_darkness = []
long_darkness = []
track_length_thresh = 5
for date, day_obs in observations.items():
    for camera, obs in day_obs.items():
        for darkness, length in zip(obs['darkness'], obs['track_length']):
            if length < track_length_thresh:
                short_darkness.append(darkness)
            else:
                long_darkness.append(darkness)
        all_darkness.extend(obs['darkness'])

In [None]:
dark_bins

In [None]:
_ = plt.hist(all_darkness, bins=200)

colors = ["#393939", "#909090"]

plt.xlabel('Average pixel value of frame (blue channel)')
plt.ylabel('Number of tracks')
            
plot_title = "Track darkness distribution"

plt.figure()

long_darkness = np.array(long_darkness)
short_darkness = np.array(short_darkness)

darkness_threshold = 170

plt.axvline(37.68123528, c='gray', linestyle='--')
plt.axvline(75.36247056, c='gray', linestyle='--')
plt.axvline(113.04370584, c='gray', linestyle='--')
plt.axvline(151, c='gray', linestyle='--')
_ = plt.hist([long_darkness[long_darkness<darkness_threshold], short_darkness[short_darkness<darkness_threshold]], bins=200, 
             stacked=True, label=['long tracks (>= 5 frames)', 'short tracks (< 5 frames)'], color=[(0, 0, 0), (.75, .75, .75)])
plt.xlim(0, darkness_threshold)
plt.xticks(np.arange(0, 170, 20), np.arange(0, 170, 20))
plt.xlabel('Average pixel value of frame (blue channel)')
plt.ylabel('Number of tracks')
plt.legend()
            
plot_title = "Track darkness distribution with noise shown and bin lines"
save_fig(plots_save_folder, plot_title, fig=None)

plt.figure()

_ = plt.hist(long_darkness, bins=200)

plt.xlabel('Average pixel value of frame (blue channel)')
plt.ylabel('Number of tracks')
            
plot_title = "Track darkness distribution"


# plt.title(plot_title)

# save_fig(plots_save_folder, plot_title, fig=None)

In [None]:
darkness = []
track_length = []
for date, day_obs in observations.items():
    for camera, obs in day_obs.items():
        darkness.extend(obs['darkness'])
        track_length.extend(obs['track_length'])

In [None]:
sorted_track_length = np.array(sorted(track_length))
sorted_darkness = np.array([d for _, d in sorted(zip(track_length, darkness))])

sorted_track_length = sorted_track_length[(sorted_darkness < 170) & (sorted_darkness > 0)]
sorted_darkness = sorted_darkness[(sorted_darkness < 170) & (sorted_darkness > 0)]

percent = .999

core_darkness = sorted_darkness[:int(len(sorted_track_length)*percent)]
core_length = sorted_track_length[:int(len(sorted_track_length)*percent)]

_ = plt.hist(sorted_track_length[:int(len(sorted_track_length)*percent)], bins=100)


In [None]:
H = np.array([[1,2,3], [2, 3, 4]])
print(H.shape)
H_norm = H / np.expand_dims(np.sum(H, axis=0), 0)
print(H_norm.shape)
print(H_norm)
print(np.sum(H_norm, axis=0))

In [None]:
fig, ax = plt.subplots(1, 1)
H, xedges, yedges = np.histogram2d(core_darkness, core_length, bins=(15, 50), density=False)
H = H.T  # Let each row list bins with common y range.

H_norm = H / np.expand_dims(np.sum(H, axis=0), 0)

X, Y = np.meshgrid(xedges, yedges)
im = ax.pcolormesh(X, Y, H)
ax.set_xlabel('Average pixel value of frame (blue channel)')
ax.set_ylabel('Track length')
fig.colorbar(im)

plot_title = "track length and darkness 2dhistogram"


# plt.title(plot_title)

save_fig(plots_save_folder, plot_title, fig=fig)

In [None]:
sorted_track_length = np.array(sorted(track_length))
sorted_darkness = np.array([d for _, d in sorted(zip(track_length, darkness))])

sorted_track_length = sorted_track_length[(sorted_darkness < 170) & (sorted_darkness > 25)]
sorted_darkness = sorted_darkness[(sorted_darkness < 170) & (sorted_darkness > 25)]

percent = .999

core_darkness = sorted_darkness[:int(len(sorted_track_length)*percent)]
core_length = sorted_track_length[:int(len(sorted_track_length)*percent)]

In [None]:

H, xedges, yedges = np.histogram2d(core_darkness, core_length, bins=(15, 50), density=False)
H = H.T  # Let each row list bins with common y range.

H_norm = H / np.expand_dims(np.sum(H, axis=0), 0)

X, Y = np.meshgrid(xedges, yedges)
fig, ax = plt.subplots(1, 1)
im = ax.pcolormesh(X, Y, H)
ax.set_xlabel('Average pixel value of frame (blue channel)')
ax.set_ylabel('Track length')
fig.colorbar(im)

plot_title = "track length and darkness 2dhistogram above 25 darkness"


# plt.title(plot_title)

save_fig(plots_save_folder, plot_title, fig=fig)

fig, ax = plt.subplots(1, 1)
im = ax.pcolormesh(X, Y, H_norm)
ax.set_xlabel('Average pixel value of frame (blue channel)')
ax.set_ylabel('Track length')
fig.colorbar(im)

plot_title = "track length and darkness 2dhistogram above 25 darkness normalized in each darkness column"


# plt.title(plot_title)

save_fig(plots_save_folder, plot_title, fig=fig)