In [None]:
import json
import os
import pandas as pd
import numpy as np
from datetime import datetime, timezone, timedelta, time
from research.utils.data_access_utils import RDSAccessUtils
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point
import matplotlib.pyplot as plt
from matplotlib.dates import AutoDateFormatter, AutoDateLocator

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
queryCache = {}

In [None]:
pen = {
    'pen_id': 108,
    'start_date': '2020-05-09 00:00',
    'end_date': '2020-05-12 00:00'
}

pen = {
    'pen_id': 56,
    'start_date': '2020-08-12 00:00',
    'end_date': '2020-08-17 00:00'
}

pen = {
    'pen_id': 88,
    'start_date': '2020-02-12 00:00',
    'end_date': '2020-02-19 00:00'
}

akpd_filter = 0.99

In [None]:
query = """
    SELECT * FROM (
      (SELECT * FROM prod.crop_annotation cas
      INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
      WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
      AND cas.annotation_state_id = 3
      AND cas.pen_id=%i) a
    RIGHT JOIN 
      (SELECT left_crop_url as lcu, estimated_weight_g, akpd_score, estimated_k_factor FROM prod.biomass_computations
      WHERE prod.biomass_computations.captured_at >= '%s'
      AND prod.biomass_computations.captured_at <= '%s'
      AND prod.biomass_computations.akpd_score > %0.4f) bc 
    ON 
      (a.left_crop_url=bc.lcu)
    ) x
    WHERE x.captured_at >= '%s'
    AND x.captured_at <= '%s'
    AND x.pen_id = %i
    AND x.group_id = '%i';
""" % (pen['pen_id'], pen['start_date'], pen['end_date'], akpd_filter, pen['start_date'], pen['end_date'], pen['pen_id'], pen['pen_id'])

if query in queryCache:
    df = queryCache[query].copy()
else:
    df = rds_access_utils.extract_from_database(query)
    queryCache[query] = df.copy()

captures = df

captures.captured_at = pd.to_datetime(captures['captured_at'])
captures = df.sort_values(['captured_at'])
captures = captures.reset_index(drop=True)

print(len(captures))

In [None]:
# captures = pd.read_csv('bolaks_pen_id_88_2020-02-10_2020-03-10.csv')
captures.ix[0]

In [None]:
# df.head(20)

def get_eye_wkps(row1, row2):
    ann1, cm1 = row1.annotation, row1.camera_metadata
    wkps1 = pixel2world(ann1['leftCrop'], ann1['rightCrop'], cm1)
    ann2, cm2 = row2.annotation, row2.camera_metadata
    wkps2 = pixel2world(ann2['leftCrop'], ann2['rightCrop'], cm2)
    
    return wkps1['EYE'], wkps2['EYE']

body_parts = ['TAIL_NOTCH', 'ADIPOSE_FIN', 'UPPER_LIP', 'ANAL_FIN', 'PELVIC_FIN', 'EYE', 'PECTORAL_FIN', 'DORSAL_FIN']

def get_median_distance(row1, row2):
    ann1, cm1 = row1.annotation, row1.camera_metadata
    wkps1 = pixel2world(ann1['leftCrop'], ann1['rightCrop'], cm1)
    ann2, cm2 = row2.annotation, row2.camera_metadata
    wkps2 = pixel2world(ann2['leftCrop'], ann2['rightCrop'], cm2)
    
    distances = []
    
    for body_part in body_parts:
        if wkps1[body_part][1] < 0 or wkps2[body_part][1] < 0:
            return None
    
        distances.append(np.linalg.norm(wkps1[body_part] - wkps2[body_part]))
    
    return np.median(distances)

def get_median_distance_adj(row1, row2):
    ann1, cm1 = row1.annotation, row1.camera_metadata
    ann2, cm2 = row2.annotation, row2.camera_metadata
    
    leftCrop1 = json.loads(ann1.replace("\'", "\""))['leftCrop']
    rightCrop1 = json.loads(ann1.replace("\'", "\""))['rightCrop']
    cm1 = json.loads(cm1.replace("\'", "\""))
    leftCrop2 = json.loads(ann2.replace("\'", "\""))['leftCrop']
    rightCrop2 = json.loads(ann2.replace("\'", "\""))['rightCrop']
    cm2 = json.loads(cm2.replace("\'", "\""))
    
    wkps1 = pixel2world(leftCrop1, rightCrop1, cm1)
    wkps2 = pixel2world(leftCrop2, rightCrop2, cm2)
    
    distances = []
    
    for body_part in body_parts:
        if wkps1[body_part][1] < 0 or wkps2[body_part][1] < 0:
            return None
    
    return np.median(distances)

def get_eye_distance(row1, row2):
    ann1, cm1 = row1.annotation, row1.camera_metadata
    wkps1 = pixel2world(ann1['leftCrop'], ann1['rightCrop'], cm1)
    ann2, cm2 = row2.annotation, row2.camera_metadata
    wkps2 = pixel2world(ann2['leftCrop'], ann2['rightCrop'], cm2)
    
    if wkps1['EYE'][1] < 0 or wkps2['EYE'][1] < 0:
        return None
    
    return np.linalg.norm(wkps1['EYE'] - wkps2['EYE'])

In [None]:
captures.captured_at = pd.to_datetime(captures['captured_at'])
captures = captures.sort_values(['captured_at'])

startDate = np.min(captures.captured_at).date()
endDate = np.max(captures.captured_at).date()
delta = endDate - startDate

dates = []
for i in range(delta.days + 1):
    day = startDate + timedelta(days = i)
    day = datetime.combine(day, time.min).replace(tzinfo=timezone.utc)
    dates.append(day)

In [None]:

# pairs.p1_captured_at = pd.to_datetime(pairs['p1_captured_at'])
# pairs.p2_captured_at = pd.to_datetime(pairs['p2_captured_at'])

#captures.ix[0]['annotation']


In [None]:
gaps = captures['captured_at'].diff()

np.mean(gaps < timedelta(seconds=0.5)), np.mean(gaps < timedelta(seconds=1.5)), np.mean(gaps < timedelta(seconds=2.5))

In [None]:
for date in dates:
    daily_captures = (captures['captured_at'] > date) & (captures['captured_at'] < (date + timedelta(days = 1)))
    #daily_pairs = (pairs['p1_captured_at'] > datetime(2020, 2, i, tzinfo=timezone.utc)) & (pairs['p1_captured_at'] < datetime(2020, 2, i + 1, tzinfo=timezone.utc))

    print(date, np.sum(daily_captures))

In [None]:
found_pairs = []
linkages = []
singles = []
weights_all = []
weights_pairs = []
k_factor_all = []
k_factor_pairs = []
existing_indices = []

num_sec = 10
pixel_threshold = 300
weight_g_threshold = 500
k_factor_treshold = 0.3

for date in dates:
    mask = (captures['captured_at'] > date) & (captures['captured_at'] < (date + timedelta(days = 1)))

    old_datas = []
    
#     old_rows = []
#     old_captured_ats = []
#     old_ids = []
#     old_tails = []
#     old_eyes = []

    count = 0
    bad_count = 0
#     correct_count = 0

    for index, row in captures[mask].iterrows():
        current_captured_at = row.captured_at

        # Delete old captures
        old_datas = [ old_data for old_data in old_datas if np.abs(current_captured_at - old_data['captured_at']) < timedelta(seconds = num_sec) ]
#         old_captured_at_indices = [i for i, ca in enumerate(old_captured_ats) if np.abs(current_captured_at - ca) < timedelta(seconds=10)]
#         old_captured_ats = [ca for ca in  enumerate(old_captured_ats) if j in old_captured_at_indices ]
#         old_ids = [_id for j, _id in enumerate(old_ids) if j in old_captured_at_indices ]
#         old_tails = [row for j, row in enumerate(old_tails) if j in old_captured_at_indices ]
#         old_eyes = [row for j, row in enumerate(old_eyes) if j in old_captured_at_indices ]
#         old_rows = [row for j, row in enumerate(old_rows) if j in old_captured_at_indices ]
        
        #print(current_captured_at)

        # Check captures
#         left_crops = json.loads(row['annotation'].replace("\'", "\""))['leftCrop']
        left_crops = row['annotation']['leftCrop']
        left_tail = [crop for crop in left_crops if crop['keypointType'] == 'ANAL_FIN'][0]
        left_eye = [crop for crop in left_crops if crop['keypointType'] == 'EYE'][0]
    #     right_crops = json.loads(row['annotation'].replace("\'", "\""))['rightCrop']
    #     right_eye = [crop for crop in right_crops if crop['keypointType'] == 'EYE'][0]

        k_factor = row['estimated_k_factor']
        weight_g = row['estimated_weight_g']
        
        weights_all.append(weight_g)
        k_factor_all.append(k_factor)
        singles.append([index, weight_g, k_factor])
    
        for old_data in old_datas:
#             ca = old_captured_ats[i]
#             old_tail = old_tails[i]
#             old_row = old_rows[i]
            old_eye = old_data['eye']
            old_tail = old_data['tail']
            old_captured_at = old_data['captured_at']
            #old_row = old_data['row']
            old_index = old_data['index']
            old_k_factor = old_data['k_factor']
            old_weight_g = old_data['weight_g']
    
            #distance = get_eye_distance(captures.ix[old_index], captures.ix[index])
            distance = get_median_distance(captures.ix[old_index], captures.ix[index])
#             distance = get_median_distance_adj(captures.ix[old_index], captures.ix[index])
            time_seconds = (current_captured_at - old_captured_at).total_seconds()
    
            weight_g_diff = np.abs(weight_g - old_weight_g)
            k_factor_diff = np.abs(k_factor - old_k_factor)
            
            if distance is None or time_seconds < .1:
                bad_count = bad_count + 1
                continue
            
            speed = distance / time_seconds

            shift_condition = np.abs((old_eye['xFrame'] - left_eye['xFrame']) - (old_tail['xFrame'] - left_tail['xFrame'])) < pixel_threshold
            vertical_condition = np.abs((old_eye['yFrame'] - left_eye['yFrame']) - (old_tail['yFrame'] - left_tail['yFrame'])) < pixel_threshold #np.abs(old_eye['yFrame'] - left_eye['yFrame']) < 200

            if left_tail['xFrame'] < left_eye['xFrame']:
                horizontal_condition = old_tail['xFrame'] < old_eye['xFrame'] and old_eye['xFrame'] < left_eye['xFrame'] and old_tail['xFrame'] < left_tail['xFrame']
                
                if horizontal_condition and shift_condition and vertical_condition:
                    if weight_g_diff > weight_g_threshold or k_factor_diff > k_factor_treshold:
                        bad_count = bad_count + 1
                        continue

                    count = count + 1

#                     if np.sum((pairs.p2 == index) & (pairs.p1 == old_ids[i])) > 0:
#                         correct_count = correct_count + 1
#                     else:
#                         pass
                    weights_pairs.append(weight_g)
                    weights_pairs.append(old_weight_g)
                    k_factor_pairs.append(k_factor)
                    k_factor_pairs.append(old_k_factor)
                    singles = [ s for s in singles if s[0] not in (index, old_index) ]
                    
                    if speed > 2:
                        print(old_index, index, speed, distance, time_seconds)
                    
                    if not (index in existing_indices or old_index in existing_indices):
                        found_pairs.append([old_index, index, old_captured_at, current_captured_at, speed, distance, time_seconds, weight_g, k_factor, old_weight_g, old_k_factor, weight_g_diff, k_factor_diff, np.abs(old_eye['xFrame'] - left_eye['xFrame']), np.abs(old_eye['yFrame'] - left_eye['yFrame']), np.abs(old_tail['xFrame'] - left_tail['xFrame']), np.abs(old_tail['yFrame'] - left_tail['yFrame']), old_eye['xFrame'], old_eye['yFrame'], old_tail['xFrame'], old_tail['yFrame'], left_eye['xFrame'], left_eye['yFrame'], left_tail['xFrame'], left_tail['yFrame']])
                        linkages.append([[old_index, index], [weight_g, old_weight_g], [k_factor, old_k_factor], [ speed ], (weight_g + old_weight_g) / 2, (k_factor + old_k_factor) / 2, speed, 2])
                        existing_indices.append(old_index)
                        existing_indices.append(index)
                    else: # in the future, instead of throwing away. Average over new speed
                        foundLinkage = [ linkage for linkage in linkages if (old_index in linkage[0] or index in linkage[0]) ][0]
                        if old_index not in foundLinkage[0]:
                            foundLinkage[0].append(old_index)
                            foundLinkage[1].append(old_weight_g)
                            foundLinkage[2].append(old_k_factor)
                            existing_indices.append(old_index)
                        if index not in foundLinkage[0]:
                            foundLinkage[0].append(index)
                            foundLinkage[1].append(weight_g)
                            foundLinkage[2].append(k_factor)
                            existing_indices.append(index)
                        foundLinkage[3].append(speed)
                        foundLinkage[4] = np.mean(foundLinkage[1])
                        foundLinkage[5] = np.mean(foundLinkage[2])
                        foundLinkage[6] = np.mean(foundLinkage[3])
                        foundLinkage[7] = len(foundLinkage[0])
                        
                #print(index, old_ids[i])
                #print(index, old_ids[i])
                #print(left_eye['xFrame'])
            else:
                horizontal_condition = old_tail['xFrame'] > old_eye['xFrame'] and old_eye['xFrame'] > left_eye['xFrame'] and old_tail['xFrame'] > left_tail['xFrame']
                
                if horizontal_condition and shift_condition and vertical_condition:
                    if weight_g_diff > weight_g_threshold or k_factor_diff > k_factor_treshold:
                        bad_count = bad_count + 1
                        continue

                    count = count + 1

#                     if np.sum((pairs.p2 == index) & (pairs.p1 == old_ids[i])) > 0:
#                         correct_count = correct_count + 1
#                     else:
#                         pass
                    weights_pairs.append(weight_g)
                    weights_pairs.append(old_weight_g)
                    singles = [ s for s in singles if s[0] not in (index, old_index) ]
                    
                    if speed > 2:
                        print(old_index, index, speed, distance, time_seconds)
                
                    if not (index in existing_indices or old_index in existing_indices):
                        found_pairs.append([old_index, index, old_captured_at, current_captured_at, speed, distance, time_seconds, weight_g, k_factor, old_weight_g, old_k_factor, weight_g_diff, k_factor_diff, np.abs(old_eye['xFrame'] - left_eye['xFrame']), np.abs(old_eye['yFrame'] - left_eye['yFrame']), np.abs(old_tail['xFrame'] - left_tail['xFrame']), np.abs(old_tail['yFrame'] - left_tail['yFrame']), old_eye['xFrame'], old_eye['yFrame'], old_tail['xFrame'], old_tail['yFrame'], left_eye['xFrame'], left_eye['yFrame'], left_tail['xFrame'], left_tail['yFrame']])
                        linkages.append([[old_index, index], [weight_g, old_weight_g], [k_factor, old_k_factor], [ speed ], (weight_g + old_weight_g) / 2, (k_factor + old_k_factor) / 2, speed, 2])
                        existing_indices.append(old_index)
                        existing_indices.append(index)
                    else: # in the future, instead of throwing away. Average over new speed
                        foundLinkage = [ linkage for linkage in linkages if (old_index in linkage[0] or index in linkage[0]) ][0]
                        if old_index not in foundLinkage[0]:
                            foundLinkage[0].append(old_index)
                            foundLinkage[1].append(old_weight_g)
                            foundLinkage[2].append(old_k_factor)
                            existing_indices.append(old_index)
                        if index not in foundLinkage[0]:
                            foundLinkage[0].append(index)
                            foundLinkage[1].append(weight_g)
                            foundLinkage[2].append(k_factor)
                            existing_indices.append(index)
                        foundLinkage[3].append(speed)
                        foundLinkage[4] = np.mean(foundLinkage[1])
                        foundLinkage[5] = np.mean(foundLinkage[2])
                        foundLinkage[6] = np.mean(foundLinkage[3])
                        foundLinkage[7] = len(foundLinkage[0])
                    
        old_datas.append({
            'eye': left_eye,
            'tail': left_tail,
            'captured_at': current_captured_at,
            #'row': row,
            'index': index,
            'k_factor': k_factor,
            'weight_g': weight_g
        })
#         old_captured_ats.append(current_captured_at)
#         old_ids.append(index)
#         old_tails.append(left_tail)
#         old_eyes.append(left_eye)
#         old_rows.append(row)

#     print(count, correct_count)
    print('Count', date, bad_count, count)


In [None]:
len(newLinkages[:, 0]) / len(weights_all), len(weights_all), len(weights_pairs), len(newLinkages[:, 0]), len(newSingles)

In [None]:
linkages[0]

In [None]:
newLinkages = np.array([ linkage[4:] for linkage in linkages ])
newSingles = np.array([ s[1] for s in singles ])
KFSingles = np.array([ s[2] for s in singles ])

#plt.hist(newLinkages[:,2])

overallDedup = np.concatenate((newLinkages[:,0], newSingles))
KFDedup = np.concatenate((newLinkages[:,1], KFSingles))

print(np.mean(weights_all), np.mean(weights_pairs), np.mean(newLinkages[:,0]), np.mean(newSingles), np.mean(overallDedup))
print(np.mean(k_factor_all), np.mean(k_factor_pairs), np.mean(newLinkages[:,1]), np.mean(KFSingles), np.mean(KFDedup))

all_pct, new_pct = [], []

for i in np.arange(0, 100, 1):
    all_pct.append(np.percentile(weights_all, i))
    #new_pct.append(np.percentile(newLinkages[:,0], i))
    new_pct.append(np.percentile(overallDedup, i))
    
# plt.scatter(all_pct, new_pct)
# xpoints = ypoints = plt.xlim()
# plt.plot(xpoints, ypoints, linestyle='--', color='k', lw=3, scalex=False, scaley=False)
# plt.xlabel('All weights')
# plt.ylabel('De-duplicated weights')
# plt.title('All weights vs deduplicated weights')

# fig, axes = plt.subplots(1, 1, figsize=(10, 10))
# axes.hist(weights_all, bins = 50, density = True, alpha = 0.5, label = 'All')
# axes.hist(overallDedup, bins = 50, density = True, alpha = 0.5, label = 'Singles + Deduplicated Duplicates')
# axes.legend()

fig, axes = plt.subplots(1, 1, figsize=(10, 10))
axes.hist(k_factor_all, bins = 50, density = True, alpha = 0.5, label = 'All')
axes.hist(KFDedup, bins = 50, density = True, alpha = 0.5, label = 'Singles + Deduplicated Duplicates')
axes.legend()

np.mean(overallDedup) / np.mean(weights_all), np.mean(KFDedup) / np.mean(k_factor_all), np.std(overallDedup) / np.std(weights_all)

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(10, 10))
axes.hist(KFSingles, bins = 20, density = True, alpha = 0.5, label = 'Singles')
axes.hist(newLinkages[:,1], bins = 20, density = True, alpha = 0.5, label = 'Deduplicated Duplicates')
axes.legend()

In [None]:
import csv

with open('duplicate_detections.csv', 'w') as outcsv:   
    writer = csv.writer(outcsv, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL, lineterminator='\n')
    writer.writerow(['p1', 'p2', 'p1_captured_at', 'p2_captured_at', 'speed', 'distance', 'time', 'weight_g', 'k_factor', 'old_weight_g', 'old_k_factor', 'weight_g_diff', 'k_factor_diff', 'eye_to_eye_xFrame', 'eye_to_eye_yFrame', 'tail_to_tail_xFrame', 'tail_to_tail_yFrame', 'old_eye_xFrame', 'old_eye_yFrame', 'old_tail_xFrame', 'old_tail_yFrame', 'left_eye_xFrame', 'left_eye_yFrame', 'left_tail_xFrame', 'left_tail_yFrame'])
    for item in found_pairs:
        writer.writerow(item)

In [None]:
duplicate_detections = pd.read_csv('duplicate_detections.csv')



In [None]:
duplicate_detections
#duplicate_detections[duplicate_detections['speed'] > 1.5]



In [None]:
plt.scatter(duplicate_detections.time, duplicate_detections.speed)
plt.xlabel('Time between frames')
plt.ylabel('Swimmng speed')
plt.title('Time between frames vs Swimming speed')

In [None]:
avgs = []

for i in np.arange(10):
    mask = (duplicate_detections.time > i + 0.5) & (duplicate_detections.time < (i + 1.5))
    avg_speed = np.mean(duplicate_detections.speed[mask])
    print(i, np.sum(mask), avg_speed * (i - 1))
    avgs.append(avg_speed * (i - 1))

#plt.scatter(duplicate_detections.time, duplicate_detections.k_factor)
plt.plot(np.arange(10) + 1, avgs)
plt.xlabel('Time between frames')
plt.ylabel('Distance')
plt.title('Time between frames vs distance')

In [None]:
avgs = []

for i in np.arange(10):
    mask = (duplicate_detections.time > i + 0.5) & (duplicate_detections.time < (i + 1.5))
    avg_speed = np.mean(duplicate_detections.speed[mask])
    print(i, np.sum(mask), avg_speed * (i - 1))
    avgs.append(avg_speed * (i - 1))

#plt.scatter(duplicate_detections.time, duplicate_detections.k_factor)
plt.plot(np.arange(10) + 1, avgs)
plt.xlabel('Time between frames')
plt.ylabel('Distance')
plt.title('Time between frames vs distance')

In [None]:
avgs = []

for i in np.arange(10):
    mask = (duplicate_detections.time > i + 0.5) & (duplicate_detections.time < (i + 1.5))
    avg_k_factor = np.mean(duplicate_detections.k_factor[mask])
    print(i, np.sum(mask), avg_k_factor)
    avgs.append(avg_k_factor)

#plt.scatter(duplicate_detections.time, duplicate_detections.k_factor)
plt.plot(np.arange(10) + 1, avgs)
plt.xlabel('Time between frames')
plt.ylabel('K-factor')
plt.title('Time between frames vs K-factor')

In [None]:
avgs = []

for i in np.arange(10):
    mask = (duplicate_detections.time > i + 0.5) & (duplicate_detections.time < (i + 1.5))
    avg_weight_g = np.mean(duplicate_detections.weight_g[mask])
    print(i, np.sum(mask), avg_weight_g)
    avgs.append(avg_weight_g)

#plt.scatter(duplicate_detections.time, duplicate_detections.k_factor)
plt.plot(np.arange(10) + 1, avgs)
plt.xlabel('Time between frames')
plt.ylabel('K-factor')
plt.title('Time between frames vs Weight')

In [None]:
np.mean(duplicate_detections.weight_g)

In [None]:
avgs = []

for i in np.arange(10):
    mask = (duplicate_detections.weight_g > (i * 1000)) & (duplicate_detections.weight_g < ((i + 1) * 1000))
    avg_speed = np.mean(duplicate_detections.speed[mask])
    print(i, np.sum(mask), avg_speed)
    avgs.append(avg_speed)

#plt.scatter(duplicate_detections.time, duplicate_detections.k_factor)
plt.plot(np.arange(10) * 1000, avgs)
plt.xlabel('Time between frames')
plt.ylabel('Speed')
plt.title('Time between Weight vs Weight')

In [None]:
# row1 = captures.ix[1994]
# row2 = captures.ix[1995]

# print(get_eye_wkps(row1, row2))
# print(get_eye_wkps(row2, row1))
# print(get_eye_distance(row1, row2))
# print(get_eye_distance(row2, row1))

In [None]:
#np.mean(duplicate_detections.replace([np.inf, -np.inf], np.nan).dropna(subset=['speed'], how="all")['speed'])


In [None]:
#speed = duplicate_detections.replace([np.inf, -np.inf], np.nan).dropna(subset=['speed'], how="all")['speed']
duplicate_detections.p1_captured_at = pd.to_datetime(duplicate_detections['p1_captured_at'])

fig, axes = plt.subplots(len(dates), 1, figsize=(10, len(dates) * 5))

for index, date in enumerate(dates):
    mask = (duplicate_detections['p1_captured_at'] > date) & (duplicate_detections['p1_captured_at'] < (date + timedelta(days = 1)))
    
    speed = duplicate_detections[mask]['speed']
    axes[index].hist(speed, bins = 20)
    axes[index].set_xlabel('Speed (meters per second)')
    axes[index].set_ylabel('Frequency')
    axes[index].set_title('Swimming Speed Distribution for Pen %i on %s (%i samples)' % (pen['pen_id'], date.strftime('%m/%d/%Y'), np.sum(mask)))

In [None]:
plt.scatter(duplicate_detections['p1_captured_at'], duplicate_detections['speed'])
plt.plot(duplicate_detections['speed'].rolling(window=5).mean())

In [None]:
fig, axes = plt.subplots(1, 1, figsize=(20, 10))

xtick_locator = AutoDateLocator(minticks=50)
xtick_formatter = AutoDateFormatter(xtick_locator)

axes.xaxis.set_major_locator(xtick_locator)
axes.xaxis.set_major_formatter(xtick_formatter)

ma = duplicate_detections['speed'].rolling(window=10).mean()
axes.plot(duplicate_detections['p1_captured_at'], ma)

In [None]:
ser = pd.Series(duplicate_detections['speed'].values, index = duplicate_detections['p1_captured_at'])
ma = ser.rolling('6h').mean().resample('h').apply(lambda x:x.tail(1) if x.shape[0] else np.nan).fillna(method='ffill')

fig, axes = plt.subplots(1, 1, figsize=(20, 10))

xtick_locator = AutoDateLocator(minticks=50)
xtick_formatter = AutoDateFormatter(xtick_locator)

axes.xaxis.set_major_locator(xtick_locator)
axes.xaxis.set_major_formatter(xtick_formatter)

axes.plot(ma, color = 'red')
axes.set_xlabel('Time')
axes.set_ylabel('Average Speed (meters per second)')
axes.set_title('Average Swimming Speed for Pen %i (%i samples)' % (pen['pen_id'], len(duplicate_detections)))
axes.scatter(duplicate_detections['p1_captured_at'], duplicate_detections['speed'])

In [None]:
np.mean(duplicate_detections['eye_to_eye_xFrame'] - duplicate_detections['tail_to_tail_xFrame'] < 100)

In [None]:
wrong_pair = (24, 25)

from IPython.display import Image
from IPython.core.display import HTML 
Image(url=captures.ix[wrong_pair[0], 'left_crop_url'])

In [None]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url=captures.ix[wrong_pair[1], 'left_crop_url'])