In [None]:
import pandas as pd
import numpy as np
import json, os
from research.utils.data_access_utils import RDSAccessUtils
from filter_optimization.filter_optimization_task import extract_biomass_data
from research.weight_estimation.keypoint_utils.optics import euclidean_distance, pixel2world, depth_from_disp, convert_to_world_point

from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, DayLocator

rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

In [None]:
queryCache = {}

In [None]:
finalQueryCache = {}

In [None]:
query = """
   SELECT p.id pen_id, p.name pen_name, s.name site_name FROM customer.pens p 
   LEFT JOIN customer.sites s
   ON p.site_id = s.id
"""

pen_df = rds_access_utils.extract_from_database(query)

PEN_NAME = {}

for index, row in pen_df.iterrows():
    PEN_NAME[row.pen_id] = (row.site_name or '') + ' ' + row.pen_name

In [None]:
def getPenDF(pen):
    query = """
       SELECT * FROM prod.biomass_computations
        WHERE prod.biomass_computations.captured_at >= '%s'
        AND prod.biomass_computations.captured_at <= '%s'
        AND prod.biomass_computations.pen_id = '%s'
        AND prod.biomass_computations.akpd_score > 0;
    """ % (pen['start_date'], pen['end_date'], pen['pen_id'])
    
    if query in finalQueryCache:
        df = finalQueryCache[query].copy()
        return df
        
    if query in queryCache:
        df = queryCache[query].copy()
    else:
        df = rds_access_utils.extract_from_database(query)
        queryCache[query] = df.copy()

    df = df.sort_values('captured_at').copy(deep=True)
    df.index = pd.to_datetime(df.captured_at)
    dates = df.index.date.astype(str)
    df['date'] = dates
    df['hour'] = df.index.hour
    
    depths = []
    for idx, row in df.iterrows():
        ann, cm = row.annotation, row.camera_metadata
        wkps = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
        depth = np.median([wkp[1] for wkp in wkps.values()])
        depths.append(depth)
    df['depth'] = depths
    
    finalQueryCache[query] = df.copy()
    
    return df

In [None]:
df = pd.read_json('swaps2.json')

In [None]:
df = df.sort_values('first_seen_at')

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
seen_pen_ids = []
first_seen_at = []
camera_type = []
glass_elements = []
swaps = []

for index, row in df.iterrows():
    pen_id = row['pen_id']
    
    if pen_id not in seen_pen_ids:
        seen_pen_ids.append(pen_id)
        first_seen_at.append(row['first_seen_at'])
        camera_type.append(row['camera_type'])
        glass_elements.append(row['glass_elements'])
        continue
    
    index = seen_pen_ids.index(pen_id)
        
    if camera_type[index] == 'Ethernet' and row['camera_type'] == 'USB':
        print(pen_id, first_seen_at[index], row['first_seen_at'], camera_type[index], row['camera_type'], glass_elements[index], row['glass_elements'])
        swaps.append((pen_id, first_seen_at[index], row['first_seen_at'], camera_type[index], row['camera_type'], glass_elements[index], row['glass_elements']))
    
    first_seen_at[index] = row['first_seen_at']
    camera_type[index] = row['camera_type']
    glass_elements[index] = row['glass_elements']

In [None]:

    camera_type[index] = row['camera_type']swap_ids = [-4]

fig, ax = plt.subplots(len(swap_ids), figsize=(20, 20))

for index, swap_id in enumerate(swap_ids):
    swap = swaps[swap_id]

    pen_id, start_date, end_date, _, _ = swap

    adj_start_date = max(start_date, end_date - timedelta(days=5))
    second_start_date = end_date + timedelta(days=1)
    second_end_date = second_start_date + timedelta(days=5)

#     print(swap)

    before = getPenDF({
        'pen_id': pen_id,
        'start_date': adj_start_date.strftime('%Y-%m-%d'),
        'end_date': end_date.strftime('%Y-%m-%d')
    })

    after = getPenDF({
        'pen_id': pen_id,
        'start_date': second_start_date.strftime('%Y-%m-%d'),
        'end_date': second_end_date.strftime('%Y-%m-%d')
    })

    old_weight = []
    new_weight = []

    before_dates = sorted(list(set(list(before.date))))
    after_dates = sorted(list(set(list(after.date))))

    plot_before_dates = [ datetime.strptime(d, '%Y-%m-%d') for d in before_dates ]
    plot_after_dates = [ datetime.strptime(d, '%Y-%m-%d') for d in after_dates ]

    for date in before_dates:
        mask = (before.akpd_score > 0.95) & (before.hour >= 9) & (before.hour <= 15) & (before.date == date)

        df2 = before[mask]

        old_weight.append(np.mean(df2['estimated_weight_g']))

    for date in after_dates:
        mask = (after.akpd_score > 0.95) & (after.hour >= 9) & (after.hour <= 15) & (after.date == date)

        df2 = after[mask]

        new_weight.append(np.mean(df2['estimated_weight_g']))

    axis = ax[index]

#     axis.xaxis.set_major_locator(DayLocator())
#     axis.xaxis.set_major_formatter(DateFormatter('%m/%d'))

    axis.set_title('%s (%i): Depth Distribution (blue = before, red = after)' % (PEN_NAME[pen_id], pen_id))
#     axis.plot(plot_before_dates, old_weight, label = 'old')
#     axis.plot(plot_after_dates, new_weight, label = 'new')

    axis.hist(before.depth, bins = 20, color = 'blue', alpha = 0.5, density = True)
    axis.hist(after.depth, bins = 20, color = 'red', alpha = 0.5, density = True)

In [None]:
seen_pen_ids = []
first_seen_at = []
camera_type = []
swaps = []

for index, row in df.iterrows():
    pen_id = row['pen_id']
    
    if pen_id not in seen_pen_ids:
        seen_pen_ids.append(pen_id)
        first_seen_at.append(row['first_seen_at'])
        camera_type.append(row['camera_type'])
        continue
    
    index = seen_pen_ids.index(pen_id)
        
    if camera_type[index] == 'Ethernet' and row['camera_type'] == 'USB':
        print(pen_id, first_seen_at[index], row['first_seen_at'], camera_type[index], row['camera_type'])
        swaps.append((pen_id, first_seen_at[index], row['first_seen_at'], camera_type[index], row['camera_type']))
    
    first_seen_at[index] = row['first_seen_at']
    camera_type[index] = row['camera_type']

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
swap_ids = [3, 5, 7, 8, 10]

fig, ax = plt.subplots(len(swap_ids), figsize=(20, 20))

for index, swap_id in enumerate(swap_ids):
    swap = swaps[swap_id]

    pen_id, start_date, end_date, _, _ = swap

    adj_start_date = max(start_date, end_date - timedelta(days=14))
    second_start_date = end_date + timedelta(days=1)
    second_end_date = second_start_date + timedelta(days=14)

#     print(swap)

    before = getPenDF({
        'pen_id': pen_id,
        'start_date': adj_start_date.strftime('%Y-%m-%d'),
        'end_date': end_date.strftime('%Y-%m-%d')
    })

    after = getPenDF({
        'pen_id': pen_id,
        'start_date': second_start_date.strftime('%Y-%m-%d'),
        'end_date': second_end_date.strftime('%Y-%m-%d')
    })

    old_weight = []
    new_weight = []

    before_dates = sorted(list(set(list(before.date))))
    after_dates = sorted(list(set(list(after.date))))

    plot_before_dates = [ datetime.strptime(d, '%Y-%m-%d') for d in before_dates ]
    plot_after_dates = [ datetime.strptime(d, '%Y-%m-%d') for d in after_dates ]

    for date in before_dates:
        mask = (before.akpd_score > 0.95) & (before.hour >= 9) & (before.hour <= 15) & (before.date == date)

        df2 = before[mask]

        old_weight.append(np.mean(df2['estimated_weight_g']))

    for date in after_dates:
        mask = (after.akpd_score > 0.95) & (after.hour >= 9) & (after.hour <= 15) & (after.date == date)

        df2 = after[mask]

        new_weight.append(np.mean(df2['estimated_weight_g']))

    axis = ax[index]

#     axis.xaxis.set_major_locator(DayLocator())
#     axis.xaxis.set_major_formatter(DateFormatter('%m/%d'))

    axis.set_title('%s (%i): Depth Distribution (blue = before, red = after)' % (PEN_NAME[pen_id], pen_id))
#     axis.plot(plot_before_dates, old_weight, label = 'old')
#     axis.plot(plot_after_dates, new_weight, label = 'new')

    axis.hist(before.depth, bins = 20, color = 'blue', alpha = 0.5, density = True)
    axis.hist(after.depth, bins = 20, color = 'red', alpha = 0.5, density = True)

In [None]:
plt.hist(before.depth)