In [None]:
import datetime as dt
import json
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from research.weight_estimation.keypoint_utils.optics import pixel2world
from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
from PIL import Image

pd.set_option('display.max_colwidth', 50)

In [None]:
s3 = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))

In [None]:
# load raw biomass computations
fs = [
#     '/root/data/alok/biomass_estimation/playground/bolaks_p88_duplicate.csv',
#     '/root/data/alok/biomass_estimation/playground/aplavika_p95_duplicate.csv',
#     '/root/data/alok/biomass_estimation/playground/cooke_p153_duplicate.csv',
#     '/root/data/alok/biomass_estimation/playground/dale_p1_duplicate.csv'
    '/root/data/alok/biomass_estimation/playground/dale_pen_3_data_14k_20thresh_duplicate.csv'
]

dfs = {}
for f in fs:
    print(f)
    df = pd.read_csv(f)
    # add date column
    df.index = pd.to_datetime(df.captured_at)
    dates = df.index.date.astype(str)
    df['date'] = dates
    df.index = range(df.shape[0])

    # add depth column 
    yaws, pitches, depths = [], [], []
    print(df.shape)
    for idx, row in df.iterrows():
        ann = json.loads(row.annotation.replace("'", '"'))
        cm = json.loads(row.camera_metadata.replace("'", '"'))

        try:
            world_keypoints = pixel2world(ann['leftCrop'], ann['rightCrop'], cm)
            depth = np.median([x[1] for x in world_keypoints.values()])
            v = world_keypoints['UPPER_LIP'] - world_keypoints['TAIL_NOTCH']
            yaw = np.arctan(v[1] / abs(v[0])) * (180.0 / np.pi)
            pitch = np.arctan(v[2] / abs(v[0])) * (180.0 / np.pi)
        except TypeError as err:
            print('here')
            yaw, pitch, depth = None, None, None
            
        depths.append(depth)
        yaws.append(yaw)
        pitches.append(pitch)

    df['depth'] = depths
    df['yaw'] = yaws
    df['pitch'] = pitches
    dfs[f] = df


In [None]:
tdfs = []
for f in fs:
    df = dfs[f]
    pairs = []
    for idx, row in df.iterrows():
        duplicate_ids_str = row.duplicated_with_IDs
        if duplicate_ids_str == '[]' or type(duplicate_ids_str) == float:
            continue
        duplicate_ids = [int(x) for x in duplicate_ids_str.strip(';').split(';')]
        for duplicate_id in duplicate_ids:
            pairs.append((int(row.ID), duplicate_id))
            
    weights_1, weights_2, depths_1, depths_2, yaws_1, yaws_2, dates_1, dates_2, ts_1, ts_2, urls_1, urls_2, ids_1, ids_2 = \
        [], [], [], [], [], [], [], [], [], [], [], [], [], []
    for pair in pairs:
        idx_1, idx_2 = pair
        row_1, row_2 = df[df.ID==idx_1].iloc[0], df[df.ID==idx_2].iloc[0]
        weight_1 = row_1.estimated_weight_g
        weight_2 = row_2.estimated_weight_g
        depth_1 = row_1.depth
        depth_2 = row_2.depth
        yaw_1 = row_1.yaw
        yaw_2 = row_2.yaw

        weights_1.append(weight_1)
        weights_2.append(weight_2)
        depths_1.append(depth_1)
        depths_2.append(depth_2)
        yaws_1.append(yaw_1)
        yaws_2.append(yaw_2)
        dates_1.append(row_1.date)
        dates_2.append(row_2.date)
        ts_1.append(dt.datetime.strptime(row_1.captured_at.replace('+00:00', ''), '%Y-%m-%d %H:%M:%S.%f'))
        ts_2.append(dt.datetime.strptime(row_2.captured_at.replace('+00:00', ''), '%Y-%m-%d %H:%M:%S.%f'))
        urls_1.append(row_1.left_crop_url)
        urls_2.append(row_2.left_crop_url)
        ids_1.append(idx_1)
        ids_2.append(idx_2)
    
    tdf = pd.DataFrame({'w1': weights_1, 'w2': weights_2, 'd1': depths_1, 'd2': depths_2, 'y1': yaws_1, 'y2': yaws_2,
                        'date1': dates_1, 'date2': dates_2, 'ts1': ts_1, 'ts2': ts_2, 'u1': urls_1, 'u2': urls_2,
                       'id1': ids_1, 'id2': ids_2})
    tdfs.append(tdf)


In [None]:
tdf = pd.concat(tdfs)

In [None]:
# mask = (tdf.date1 == '2020-03-04') & (tdf.date2 == '2020-03-05')
# ((tdf[mask].w1 - tdf[mask].w2) / tdf[mask].w1).median()

In [None]:
tdf.ts1.iloc[0].replace('+00:00', '')

In [None]:
dt.datetime.strptime(tdf.ts1.iloc[0].replace('+00:00', ''), '%Y-%m-%d %H:%M:%S.%f')

In [None]:
(tdf.ts1 - tdf.ts2) > 

In [None]:
df.iloc[48].left_crop_url

In [None]:
df.iloc[4398].left_crop_url

In [None]:
df.captured_at < '2021-02-05'

In [None]:
mask = (tdf.ts1 < '2021-02-05 10:00:00')# & (tdf.ts2 > '2021-02-05 14:00:00')

In [None]:
for idx, row in tdf.loc[mask, ['u1', 'u2']].iterrows():
    u1, u2 = row.u1, row.u2
    f1, _, _ = s3.download_from_url(u1)
    f2, _, _ = s3.download_from_url(u2)
    im1 = Image.open(f1)
    im2 = Image.open(f2)
    plt.figure(figsize=(20, 10))
    plt.imshow(im1)
    plt.show()
    plt.figure(figsize=(20, 10))
    plt.imshow(im2)
    plt.show()
    print('-------')
    



In [None]:
(tdf[mask].w1 - tdf[mask].w2).shape

In [None]:
plt.hist((tdf.y1 - tdf.y2).values)
plt.show()

In [None]:
plt.hist(df.yaw.values)
plt.show()