In [None]:
import json, os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors

from scipy import stats

import pytz 

from datetime import timedelta, datetime

from research.utils.data_access_utils import RDSAccessUtils

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))

In [None]:
pen_id = 66
site_name = 'Seglberget'
pen_name = 'Pen 1'

In [None]:
query = """
    select captured_at, annotation_metadata, annotated_by_email
    from annotations a
    where a.pen_id = %i
    and a.is_qa = true
    and a.is_skipped = false
    and a.captured_at > '2020-03-01'
    and a.captured_at < '2020-04-12';
""" % (pen_id, )

lice_counts = rds_access_utils.extract_from_database(query)

lice_counts.index = lice_counts['captured_at']
lice_counts = lice_counts.sort_index()

lice_counts

for index, lice_count in lice_counts.iterrows():
    liceCounts = lice_count['annotation_metadata']['liceCounts']

    lice_counts.ix[index, 'movingCountAdjusted'] = liceCounts['movingCountAdjusted']
    lice_counts.ix[index, 'adultFemaleCountAdjusted'] = liceCounts['adultFemaleCountAdjusted']
    lice_counts.ix[index, 'count'] = 1

qa_lice_counts = lice_counts

In [None]:
query = """
    select captured_at, annotation_metadata, annotated_by_email 
    from annotations a
    where a.pen_id = %i
    and a.is_qa = false
    and a.is_skipped = false
    and a.captured_at > '2020-03-01'
    and a.captured_at < '2020-04-12';
""" % (pen_id, )

lice_counts = rds_access_utils.extract_from_database(query)

lice_counts.index = lice_counts['captured_at']
lice_counts = lice_counts.sort_index()

lice_counts

for index, lice_count in lice_counts.iterrows():
    liceCounts = lice_count['annotation_metadata']['liceCounts']

    lice_counts.ix[index, 'movingCountAdjusted'] = liceCounts['movingCountAdjusted']
    lice_counts.ix[index, 'adultFemaleCountAdjusted'] = liceCounts['adultFemaleCountAdjusted']
    lice_counts.ix[index, 'count'] = 1

cogito_lice_counts = lice_counts

In [None]:
cogito_lice_counts

In [None]:
cogito_lice_counts.ix[0]['annotation_metadata']

In [None]:
masks = {
    'eirik@aquabyte.ai': {},
    'embla@aquabyte.ai': {},
    'gunnar@aquabyte.ai': {},
    'orjan@aquabyte.ai': {},
    'labeler1@cogitotech.com': {},
    'labeler2@cogitotech.com': {},
    'labeler3@cogitotech.com': {},
    'labeler4@cogitotech.com': {},
    'labeler5@cogitotech.com': {}
}

columns = [ 'movingCountAdjusted', 'adultFemaleCountAdjusted' ]

for key, value in masks.items():
    masks[key]['cogito_daily'] = cogito_lice_counts[cogito_lice_counts['annotated_by_email'] == key][columns].rolling('%iD' % (1, )).mean().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)
    masks[key]['qa_daily'] = qa_lice_counts[qa_lice_counts['annotated_by_email'] == key][columns].rolling('%iD' % (1, )).mean().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)
    masks[key]['cogito_daily_count'] = cogito_lice_counts[cogito_lice_counts['annotated_by_email'] == key][columns].rolling('%iD' % (1, )).count().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)
    masks[key]['qa_daily_count'] = qa_lice_counts[qa_lice_counts['annotated_by_email'] == key][columns].rolling('%iD' % (1, )).count().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)
    masks[key]['cogito_daily_sd'] = cogito_lice_counts[cogito_lice_counts['annotated_by_email'] == key][columns].rolling('%iD' % (1, )).std().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)
    
cogito_daily = cogito_lice_counts[columns].rolling('%iD' % (1, )).mean().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)
cogito_daily_sd = cogito_lice_counts[columns].rolling('%iD' % (1, )).std().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)
cogito_daily_count = cogito_lice_counts[columns].rolling('%iD' % (1, )).count().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)

qa_daily = qa_lice_counts[columns].rolling('%iD' % (1, )).mean().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)
qa_daily_sd = qa_lice_counts[columns].rolling('%iD' % (1, )).std().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)
qa_daily_count = qa_lice_counts[columns].rolling('%iD' % (1, )).count().resample('D').apply(lambda x:x.tail(1) if x.shape[0] else np.nan)


In [None]:
plt.plot(cogito_daily['movingCountAdjusted'])
plt.plot(qa_daily['movingCountAdjusted'])
plt.plot(masks[key]['cogito_daily']['movingCountAdjusted'])

In [None]:
fig, axes = plt.subplots(nrows = 2, ncols = 1, figsize = (15, 20))

for key, value in masks.items():
    difference_cogito = masks[key]['cogito_daily']['movingCountAdjusted'] - qa_daily['movingCountAdjusted']
    difference_qa = (masks[key]['cogito_daily']['movingCountAdjusted'] - qa_daily['movingCountAdjusted']) / qa_daily['movingCountAdjusted']
   
    key_cogito = '%s: %0.2f' % (key, np.mean(difference_cogito[~np.isnan(difference_cogito)]))
    key_qa = '%s: %0.0f%%' % (key, np.mean(np.abs(difference_qa[~np.isnan(difference_qa)]) * 100))
    
    c0 = masks[key]['cogito_daily_count']['movingCountAdjusted']
    c1 = cogito_daily_count['movingCountAdjusted']
    c2 = qa_daily_count['movingCountAdjusted']
    
    v0 = masks[key]['cogito_daily_sd']['movingCountAdjusted'] ** 2
    v1 = cogito_daily_sd['movingCountAdjusted'] ** 2
    v2 = qa_daily_sd['movingCountAdjusted'] ** 2
    
    t1 = np.abs(difference_cogito) / np.sqrt(v0 / c0 + v1 / c1)
    t2 = np.abs(difference_cogito) / np.sqrt(v0 / c0 + v2 / c2)
    
    z1 = np.median(t1[~np.isnan(t1)])
    z2 = np.median(t2[~np.isnan(t2)])
    z3 = np.mean(t1[~np.isnan(t1)])
    z4 = np.mean(t2[~np.isnan(t2)])
    
    p1 = 1 - stats.norm.cdf(z1)
    p2 = 1 - stats.norm.cdf(z2)
    p3 = 1 - stats.norm.cdf(z3)
    p4 = 1 - stats.norm.cdf(z4)
    
    #print('%0.2f %0.2f %0.2f %0.2f %s' % (z1, z2, z3, z4, key))
    print('%0.2f %0.2f %0.2f %0.2f %s' % (p1, p2, p3, p4, key))
    
    axes[0].bar(cogito_daily.index, difference_cogito, label = key_cogito)
    axes[1].bar(cogito_daily.index, difference_qa, label = key_qa)

for i in range(2):
    axes[i].axhline(0)
    axes[i].set_xlabel('Date')
    axes[i].set_ylabel('Difference')
    axes[i].legend()
    
axes[0].set_title('Pen %i: Absolute Difference between QA Mobile Average' % (pen_id, ))
axes[1].set_title('Pen %i: Percent Difference between QA Mobile Average' % (pen_id, ))

In [None]:
fig, axes = plt.subplots(nrows = 2, ncols = 1, figsize = (15, 20))

for key, value in masks.items():
    difference_cogito = masks[key]['cogito_daily']['adultFemaleCountAdjusted'] - qa_daily['adultFemaleCountAdjusted']
    difference_qa = (masks[key]['cogito_daily']['adultFemaleCountAdjusted'] - qa_daily['adultFemaleCountAdjusted']) / qa_daily['adultFemaleCountAdjusted']
   
    key_cogito = '%s: %0.2f' % (key, np.mean(difference_cogito[~np.isnan(difference_cogito)]))
    key_qa = '%s: %0.0f%%' % (key, np.mean(np.abs(difference_qa[~np.isnan(difference_qa)]) * 100))
    
    c0 = masks[key]['cogito_daily_count']['adultFemaleCountAdjusted']
    c1 = cogito_daily_count['adultFemaleCountAdjusted']
    c2 = qa_daily_count['adultFemaleCountAdjusted']
    
    v0 = masks[key]['cogito_daily_sd']['adultFemaleCountAdjusted'] ** 2
    v1 = cogito_daily_sd['adultFemaleCountAdjusted'] ** 2
    v2 = qa_daily_sd['adultFemaleCountAdjusted'] ** 2
    
    t1 = np.abs(difference_cogito) / np.sqrt(v0 / c0 + v1 / c1)
    t2 = np.abs(difference_cogito) / np.sqrt(v0 / c0 + v2 / c2)
    
    z1 = np.median(t1[~np.isnan(t1)])
    z2 = np.median(t2[~np.isnan(t2)])
    z3 = np.mean(t1[~np.isnan(t1)])
    z4 = np.mean(t2[~np.isnan(t2)])
    
    p1 = 1 - stats.norm.cdf(z1)
    p2 = 1 - stats.norm.cdf(z2)
    p3 = 1 - stats.norm.cdf(z3)
    p4 = 1 - stats.norm.cdf(z4)
    
    #print('%0.2f %0.2f %0.2f %0.2f %s' % (z1, z2, z3, z4, key))
    print('%0.2f %0.2f %0.2f %0.2f %s' % (p1, p2, p3, p4, key))
    
    axes[0].bar(cogito_daily.index, difference_cogito, label = key_cogito)
    axes[1].bar(cogito_daily.index, difference_qa, label = key_qa)

for i in range(2):
    axes[i].axhline(0)
    axes[i].set_xlabel('Date')
    axes[i].set_ylabel('Difference')
    axes[i].legend()
    
axes[0].set_title('Pen %i: Absolute Difference between QA Adult Female Average Average' % (pen_id, ))
axes[1].set_title('Pen %i: Percent Difference between QA Adult Female Average Average' % (pen_id, ))

In [None]:
# fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

# for key, value in masks.items():
#     axes.bar(cogito_daily.index, masks[key]['cogito_daily']['adultFemaleCountAdjusted'] - cogito_daily['adultFemaleCountAdjusted'], label = key)
#     print(key, np.mean(masks[key]['cogito_daily']['adultFemaleCountAdjusted'] - cogito_daily['adultFemaleCountAdjusted']))

# axes.axhline(0)
# axes.legend()

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 1, figsize = (15, 10))

for key, value in masks.items():
    print(key)
    axes.bar(masks[key]['cogito_daily_count'].index, masks[key]['cogito_daily_count']['movingCountAdjusted'], label = key)
    
axes.legend()

In [None]:
for key, value in masks.items():
    plt.plot(masks[key]['qa_daily'])

In [None]:
plt.plot(cogito_daily['adultFemaleCountAdjusted'])
plt.plot(qa_daily['adultFemaleCountAdjusted'])