In [None]:
import json
import os
import dash
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from aquabyte.data_access_utils import RDSAccessUtils
from plotly.subplots import make_subplots


import plotly.graph_objects as go

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['PROD_SQL_CREDENTIALS'])))
query = """
    select * from keypoint_annotations where pen_id=61 and captured_at between '2019-11-27' and '2019-12-01';
"""
df = rds_access_utils.extract_from_database(query)


In [None]:
def create_graph(value):
    
    end_date = datetime.strftime(datetime.strptime(value, '%Y-%m-%d') + timedelta(days=1), '%Y-%m-%d')
    filtered_df = df[(df.captured_at > value) & (df.captured_at < end_date)]
    
    fig = go.Figure({
        'data': [{
            'x': pd.to_datetime(filtered_df.captured_at),
            'y': list(range(filtered_df.shape[0])),
            'name': 'Total annotated'
        }, {
            'x': pd.to_datetime(filtered_df.captured_at),
            'y': np.cumsum((filtered_df.is_skipped==False).astype(int)),
            'name': 'Total accepted',
            'marker': dict(color='rgb(28, 118, 255)')
        }, {
            'x': pd.to_datetime(filtered_df.captured_at),
            'y': np.cumsum((filtered_df.is_skipped==True).astype(int)),
            'name': 'Total skipped',
            'marker': dict(color='red')
        }],
        'layout': {
            'title': 'Data Progression over Time',
            'showlegend': True,
            'legend': {
                'x': 0,
                'y': 1.0
            }, 
            'margin': dict(l=40, r=0, t=40, b=30)
        }
    })
    
    fig.show()
    

In [None]:
create_graph('2019-11-27')

In [None]:
def create_graph(value):
    
    fig = make_subplots(rows=2, cols=1, vertical_spacing=0.05)
    end_date = datetime.strftime(datetime.strptime(value, '%Y-%m-%d') + timedelta(days=1), '%Y-%m-%d')
    filtered_df = df[(df.captured_at > value) & (df.captured_at < end_date)]
    
    fig.add_trace(go.Scatter(
        x=pd.to_datetime(filtered_df.captured_at),
        y=list(range(filtered_df.shape[0])),
        name='Total annotated'
    ), row=1, col=1)
    
    fig.add_trace(go.Scatter(
        x=pd.to_datetime(filtered_df.captured_at),
        y=np.cumsum((filtered_df.is_skipped==False).astype(int)),
        name='Total accepted'
    ), row=1, col=1)
    
    fig.add_trace(go.Scatter(
        x=pd.to_datetime(filtered_df.captured_at),
        y=np.cumsum((filtered_df.is_skipped==True).astype(int)),
        name='Total skipped'
    ), row=1, col=1)
    
    fig.add_trace(go.Scatter(
        x=pd.to_datetime(filtered_df.captured_at),
        y=list(range(filtered_df.shape[0])),
        name='Total annotated'
    ), row=2, col=1)
    
    fig.add_trace(go.Scatter(
        x=pd.to_datetime(filtered_df.captured_at),
        y=np.cumsum((filtered_df.is_skipped==False).astype(int)),
        name='Total accepted'
    ), row=2, col=1)
    
    fig.add_trace(go.Scatter(
        x=pd.to_datetime(filtered_df.captured_at),
        y=np.cumsum((filtered_df.is_skipped==True).astype(int)),
        name='Total skipped'
    ), row=2, col=1)
    
    fig.update_layout(
        autosize=False,
        width=1000,
        height=1000,
        margin=go.layout.Margin(
            l=50,
            r=50,
            b=20,
            t=20,
            pad=4
        )
    )
    
    fig.show()
    

In [None]:
create_graph('2019-11-28')

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))
query = """
    SELECT * FROM
    prod.crop_annotation cas
    INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
    WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
    AND cas.pen_id=56
    AND cas.captured_at between '2020-01-03' and '2020-01-04';
"""
df = rds_access_utils.extract_from_database(query)


In [None]:
df.head()

In [None]:
def create_graph(value):
    
    fig = make_subplots(rows=2, cols=1, vertical_spacing=0.05)
    end_date = datetime.strftime(datetime.strptime(value, '%Y-%m-%d') + timedelta(days=1), '%Y-%m-%d')
    filtered_df = df[(df.captured_at > value) & (df.captured_at < end_date)].sort_values('captured_at')
    filtered_df.index = pd.to_datetime(filtered_df.captured_at)
    
    backlog_mask = filtered_df.annotation_state_id == 1
    fig.add_trace(go.Scatter(
        x=filtered_df[backlog_mask].index,
        y=list(range(filtered_df[backlog_mask].shape[0])),
        name='Total added to backlog'
    ), row=1, col=1)
    
    annotated_mask = (filtered_df.annotation_state_id == 3) | (filtered_df.annotation_state_id == 4)
    fig.add_trace(go.Scatter(
        x=filtered_df[annotated_mask].index,
        y=list(range(filtered_df[annotated_mask].shape[0])),
        name='Total annotated'
    ), row=1, col=1)
    
    accepted_mask = (filtered_df.annotation_state_id == 3)
    fig.add_trace(go.Scatter(
        x=filtered_df[accepted_mask].index,
        y=list(range(filtered_df[accepted_mask].shape[0])),
        name='Total accepted'
    ), row=1, col=1)
    
    skipped_mask = (filtered_df.annotation_state_id == 4)
    fig.add_trace(go.Scatter(
        x=filtered_df[skipped_mask].index,
        y=list(range(filtered_df[skipped_mask].shape[0])),
        name='Total skipped'
    ), row=1, col=1)
    
    
    backlog_mask = filtered_df.annotation_state_id == 1
    fig.add_trace(go.Scatter(
        x=filtered_df[backlog_mask].index,
        y=filtered_df[backlog_mask].annotation_state_id.rolling('0.5H').apply(lambda x: x.shape[0], raw=False),
        name='Backlog rate'
    ), row=2, col=1)
    
    annotated_mask = (filtered_df.annotation_state_id == 3) | (filtered_df.annotation_state_id == 4)
    fig.add_trace(go.Scatter(
        x=filtered_df[annotated_mask].index,
        y=filtered_df[annotated_mask].annotation_state_id.rolling('0.5H').apply(lambda x: x.shape[0], raw=False),
        name='Annotation rate'
    ), row=2, col=1)
    
    accepted_mask = (filtered_df.annotation_state_id == 3)
    fig.add_trace(go.Scatter(
        x=filtered_df[accepted_mask].index,
        y=filtered_df[accepted_mask].annotation_state_id.rolling('0.5H').apply(lambda x: x.shape[0], raw=False),
        name='Accept rate'
    ), row=2, col=1)
    
    skipped_mask = (filtered_df.annotation_state_id == 4)
    fig.add_trace(go.Scatter(
        x=filtered_df[skipped_mask].index,
        y=filtered_df[skipped_mask].annotation_state_id.rolling('0.5H').apply(lambda x: x.shape[0], raw=False),
        name='Skip rate'
    ), row=2, col=1)
    
    fig.update_layout(
        autosize=False,
        width=1000,
        height=1000,
        margin=go.layout.Margin(
            l=50,
            r=50,
            b=20,
            t=20,
            pad=4
        )
    )
    
    fig.show()
    

In [None]:
df[df.annotation_state_id == 1].shape

In [None]:
df[df.annotation_state_id == 3].head()

In [None]:
create_graph('2020-01-03')

In [None]:
filtered_df = df[(df.captured_at > '2019-11-27') & (df.captured_at < '2019-11-28')]
backlog_mask = filtered_df.annotation_state_id == 1
filtered_df[backlog_mask].captured_at

In [None]:
filtered_df.index = pd.to_datetime(filtered_df.captured_at)
backlog_mask = filtered_df.annotation_state_id == 1

In [None]:
in_progress_mask = (filtered_df.annotation_state_id == 2)


In [None]:
plt.hist((filtered_df[in_progress_mask].ts_source_end - filtered_df[in_progress_mask].ts_source_start).dropna())

In [None]:
plt.figure(figsize=(20, 10))
plt.hist((filtered_df[in_progress_mask].ts_source_end - filtered_df[in_progress_mask].ts_source_start) / np.timedelta64(1, 's'), bins=200)
plt.show()

In [None]:
seconds = (filtered_df[in_progress_mask].ts_source_end - filtered_df[in_progress_mask].ts_source_start) / np.timedelta64(1, 's')
seconds[seconds < 600].median()

In [None]:
seconds[seconds < 600].shape, seconds.shape

In [None]:
accept_df = filtered_df[(filtered_df.annotation_state_id==2) & (filtered_df.left_crop_url.isin(filtered_df[filtered_df.annotation_state_id==4].left_crop_url))]




In [None]:
seconds = (accept_df.ts_source_end - accept_df.ts_source_start) / np.timedelta64(1, 's')
seconds[seconds < 600].median()

In [None]:
df[(df.annotation_state_id==3) | (df.annotation_state_id==4)].annotator_email.unique()

In [None]:
import plotly

In [None]:
plotly.__version__

In [None]:
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))
query = """
    SELECT * FROM
    prod.crop_annotation cas
    INNER JOIN prod.annotation_state pas on pas.id=cas.annotation_state_id
    WHERE cas.service_id = (SELECT ID FROM prod.service where name='BATI')
    AND cas.pen_id=61
    AND cas.captured_at between '2019-09-13' and '2019-09-15';
"""
df = rds_access_utils.extract_from_database(query)


In [None]:
df.group_id.unique()