# Table of Contents
* [load data](#load-data)
* [process data](#process-data)
	* [bonuses](#bonuses)
	* [frame build](#frame-build)
	* [analysis](#analysis)
	* [building frames](#building-frames)
* [bounding box clustering](#bounding-box-clustering)
* [bonus cost estimate](#bonus-cost-estimate)
* [HTML Review](#HTML-Review)
* [hide](#hide)


In [136]:
%%capture
from __future__ import division
import numpy as np
import pandas as pd
import scipy.stats as st
import itertools
import math
from collections import Counter, defaultdict
%load_ext autoreload
%autoreload 2

import matplotlib as mpl
mpl.use("Agg")
import matplotlib.pylab as plt
#%matplotlib notebook
%matplotlib inline
%load_ext base16_mplrc
%base16_mplrc light default
plt.rcParams['figure.figsize'] = (16.0, 10.0)

import re
import pickle
import boto
from copy import deepcopy
import json
import os
import random
import jinja2
from tqdm import tqdm
from IPython.core.display import HTML

import PIL.Image as Image
import requests

from boto.mturk.qualification import PercentAssignmentsApprovedRequirement, Qualifications, Requirement

from keysTkingdom import mturk_ai2
from keysTkingdom import aws_tokes
from keysTkingdom import mturk_aristo

from amt_utils.mturk import pickle_this, unpickle_this

import os
import jinja2
import argparse
from jinja2 import Environment, FileSystemLoader

j2env = jinja2.Environment()

from amt_utils.bboxes import cluster_from_nms
from amt_utils.bboxes import draw_animation_seq
from amt_utils.bboxes import cluster_from_annos
from amt_utils.bboxes import create_subtask_data
from amt_utils.bboxes import draw_image_and_labels
import warnings
warnings.filterwarnings('ignore')

# load data

In [2]:
data_paths = ['./turker_batch_{}.pkl'.format(i) for i in range(1,6)]

turk_data_assemble = [unpickle_this(data_path) for data_path in data_paths]

turk_data = {}
_ = [turk_data.update(data_part) for data_part in turk_data_assemble]

batch_2 = turk_data_assemble[1]
batch_3 = turk_data_assemble[2]
batch_4 = turk_data_assemble[3]
batch_5 = turk_data_assemble[4]

In [113]:
subtask_data = unpickle_this('./turker_batch_6_subtask.pkl')

# process data

In [3]:
main_characters =  {"fred",
             "barney",
              "wilma",
              "betty",
              "pebbles",
              "bamm bamm",
              "dino",
              "mr slate",
              "baby puss",
              "hoppy",
              "empty frame",
              "no characters"}

def create_result(assmt):
    result = json.loads(assmt.answers[0][0].fields[0])
    result['h_id'] = assmt.HITId
    result['worker_id'] = assmt.WorkerId
    return result


def all_main(char_set, main_chars=main_characters):
    return not bool(char_set.difference(main_chars))

In [6]:
assignments =[]
for assignment_triple in list(batch_4.values()):
    assignments.extend(assignment_triple)

assignment_results = [create_result(ar) for ar in assignments]
characters_present = [{'h_id': anno['h_id'], 'w_id': anno['worker_id'], 'still_id': anno['stillID'], 'characters': set([ch['label'] for ch in json.loads(anno['characterBoxes'])])} for anno in assignment_results]

In [7]:
len(assignments)

6222

## bonuses

In [8]:
reject_variations = {
    "otherchar",
    "0ther",
    "schwenk",
}

In [9]:
def filter_rejects(chars):
    genuine_bonus = []
    reject = []
    for char in chars:
        comp_string = ''.join(c for c in char if c.isalnum()).lower()
        reject_chars = [char for var in reject_variations if var in char.replace(' ','')]
        if reject_chars or len(char.replace(' ','')) < 3:
            reject.extend(reject_chars)
        else:
            genuine_bonus.append(char)
    return reject, genuine_bonus

def assign_bonuses(assignments):
    bonuses = {}
    rejections = {}
    worker_lookup = {}
    for asgmt in assignments:
        aid = asgmt.AssignmentId
        worker_id = asgmt.WorkerId
        worker_lookup[aid] = worker_id
        result = create_result(asgmt)
        bonus_chars = [char['label'] for char in json.loads(result['characterBoxes']) if char['label'] not in main_characters]
        rejects, bonus_chars = filter_rejects(bonus_chars)
        if rejects:
            rejections[aid] = (rejects, bonus_chars)
        elif bonus_chars:
            bonuses[aid] = len(bonus_chars)
#             bonuses[aid] = bonus_chars
    return bonuses, rejections, worker_lookup

In [10]:
worker_counts = pd.Series(list(asgmt_workers.values())).value_counts()

NameError: name 'asgmt_workers' is not defined

In [850]:
sum(worker_counts > 50)

40

In [801]:
bonuses, to_reject, asgmt_workers = assign_bonuses(assignments)

In [743]:
# all_descriptions = []
# for chars in bonuses.values():
#     all_descriptions.extend(chars)

all_rejections = []
for chars in to_reject.values():
    all_rejections.extend(chars[0])

In [744]:
pd.Series(list(bonuses.values())).sum() * 0.01

28.23

In [723]:
pd.Series(list(all_descriptions)).value_counts()

old man                                           67
man                                               45
circus performer                                  30
the man dress color is brown                      26
the man was standing                              24
bird                                              22
construction worker                               20
a man                                             17
an old man                                        16
police officer                                    15
old lady                                          14
doctor                                            14
man with bow tie                                  14
police                                            14
the man dress color is blue                       14
police man                                        13
child                                             13
girl                                              13
construction worke                            

In [724]:
# pickle_this(bad_aids, 'to_reject.pkl')

In [693]:
# pd.Series(all_rejections).value_counts()

In [636]:
bad_aids = set(to_reject.keys())

In [637]:
bad_workers = set([asgmt_workers[aid] for aid in bad_aids])
len(bad_workers)

bad_workers

1

In [691]:
worker_bonuses = defaultdict(int)
for aid, char_count in bonuses.items():
    worker_bonuses[aid] = {
        'count': char_count,
        'worker_id': asgmt_workers[aid]
    }

In [694]:
pickle_this(worker_bonuses, 'bonuses_to_pay.pkl')

## frame build

In [11]:
annotations_by_frame = defaultdict(list)
for anno in sorted(assignment_results, key=lambda x: x['stillID']):
    animation_id = anno['stillID'].rsplit('_', 1)[0]
    annotations_by_frame[animation_id].append(anno)

In [804]:
with open('complete_clips.txt', 'a') as f:
    import csv
    cw = csv.writer(f, delimiter=' ')
    cw.writerow(list(annotations_by_frame.keys()))

## analysis

In [12]:
char_df = pd.DataFrame(characters_present)
char_df['aid'] = char_df['still_id'].apply(lambda x: x.rsplit('_', 1)[0])
char_df['all_main'] = char_df['characters'].apply(lambda x: all_main(x))
char_df['n_chars'] = char_df['characters'].apply(lambda x: len(x))
all_chars = [c for char_list in char_df['characters'].apply(lambda x: list(x)).tolist() for c in char_list]

In [13]:
grouped_by_still = char_df.groupby('still_id')
agg_df = grouped_by_still.aggregate(len)

agged_on_mode = grouped_by_still.agg(lambda x: st.mode(x))

In [14]:
consensus_results_df = agged_on_mode.reset_index()

consensus_results_df['count_mode'] = consensus_results_df['n_chars'].apply(lambda x: x[1][0])
consensus_results_df['mode_count'] = consensus_results_df['characters'].apply(lambda x: x[1][0])

consensus_results_df['all_agree'] = consensus_results_df['count_mode'] == 3
consensus_results_df['two_agree'] = consensus_results_df['count_mode'] == 2

char_count_df = consensus_results_df[consensus_results_df['count_mode'] == 3]

agree_all_main = consensus_results_df['all_main'].apply(lambda x: x[1][0]) == 3 

not_main =set(consensus_results_df[~agree_all_main]['still_id'].tolist())
not_main_df = char_df[char_df['still_id'].isin(not_main)]


all_agree = consensus_results_df[agree_all_main][consensus_results_df[agree_all_main]['all_agree']]
disagree = consensus_results_df[agree_all_main][~consensus_results_df[agree_all_main]['all_agree']]

all_agree_set = set(all_agree['still_id'].tolist())

count_all_agree_set = set(consensus_results_df[consensus_results_df['count_mode'] == 3]['still_id'].tolist())
count_two_agree_set = set(consensus_results_df[consensus_results_df['count_mode'] == 2]['still_id'].tolist())
count_none_agree_set = set(consensus_results_df[consensus_results_df['count_mode'] == 1]['still_id'].tolist())

conflict_images = set(disagree['still_id'].tolist())

conflict_df = char_df[char_df['still_id'].isin(conflict_images)]

In [15]:
600 * 3 * 3

5400

In [16]:
char_df.shape[0] * 0.05

311.1

In [17]:
char_counts = pd.Series(all_chars).value_counts()

In [751]:
char_counts

fred                                                 8747
barney                                               5021
wilma                                                3855
empty frame                                          3795
betty                                                2010
mr slate                                              797
dino                                                  616
pebbles                                               593
bamm bamm                                             218
hoppy                                                 154
old man                                                60
baby puss                                              53
man                                                    43
circus performer                                       27
the man dress color is brown                           26
the man was standing                                   24
bird                                                   21
construction w

In [752]:
char_counts[char_counts < 16].sum()

2458

In [753]:
322 * 0.02

6.44

In [754]:
tes = set([])

In [755]:
bool(tes)

False

In [757]:
#  char_df['characters'].apply(lambda x:x.difference(set(empty_characters)))

In [758]:
char_df['empty'] = ~char_df['characters'].apply(lambda x:bool(x.difference(set(empty_characters))))

In [759]:
char_df['nonmain_characters'] = char_df['characters'].apply(lambda x: len(x.difference(set(main_characters))))

In [760]:
grouped_by_aid = char_df.groupby('aid')

In [761]:
grouped_by_aid.agg(max)['nonmain_characters'].sum()

1326

In [762]:
grouped_by_aid.agg(max).shape

(2152, 8)

In [763]:
grouped_by_aid.agg(max)['nonmain_characters'].sum() * 3 * 3  * 0.02

238.68000000000001

In [764]:
char_df.shape[0] * 0.04

774.16

In [765]:
grouped_by_aid.agg(max)[grouped_by_aid.agg(min)['empty'] == True]

Unnamed: 0_level_0,characters,h_id,still_id,w_id,all_main,n_chars,empty,nonmain_characters
aid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
s_01_e_17_shot_012386_012460,{empty frame},3XH7ZM9YX3U4DBCE7OMFM4OL31XR9P,s_01_e_17_shot_012386_012460_70.png,ATUR98N8W23Q3,True,1,True,0
s_02_e_02_shot_023131_023205,{empty frame},3RBI0I35XF3RVAZK4ADZ3NEW6X63YS,s_02_e_02_shot_023131_023205_70.png,AVQND8233HQWK,True,1,True,0
s_02_e_04_shot_003058_003132,{empty frame},3VDVA3ILIEFVZG05N0DA8USV3GX1GT,s_02_e_04_shot_003058_003132_70.png,A3UE49REX108ZE,True,1,True,0
s_02_e_04_shot_032164_032238,{empty frame},3T6EIBTM9MOQJD02XJETN9AV9FSAAG,s_02_e_04_shot_032164_032238_70.png,AWDSIX3ULD32V,True,1,True,0
s_02_e_04_shot_041246_041320,{empty frame},3WYZV0QBFKD7Y0DNE7632S5ZSIMBXG,s_02_e_04_shot_041246_041320_70.png,ASKV5KO5CV616,True,1,True,0
s_02_e_06_shot_005960_006034,{empty frame},3Y3N5A7N4H9SHW7UASIEZMN26ONYM3,s_02_e_06_shot_005960_006034_70.png,AF1HT6VL272QZ,True,1,True,0
s_02_e_06_shot_038544_038618,{empty frame},3RWB1RTQDKNJNTDPP4CCHDTVIEVP8C,s_02_e_06_shot_038544_038618_70.png,A2X7BE2FSPUKCK,True,1,True,0
s_02_e_08_shot_000269_000343,{empty frame},3PGQRAZX03KVFRKZUPE0BVM8YC0YS8,s_02_e_08_shot_000269_000343_70.png,AEBETUY5OD68H,True,1,True,0
s_02_e_09_shot_029359_029433,{empty frame},3VDI8GSXAGTDAGKQMLMPN4F3HM28GJ,s_02_e_09_shot_029359_029433_70.png,AEF601SQFOSBL,True,1,True,0
s_02_e_13_shot_033462_033536,{empty frame},3WGCNLZJKG8SNWQELAC229PYQPM1DQ,s_02_e_13_shot_033462_033536_70.png,ASKV5KO5CV616,True,1,True,0


In [767]:
grouped_by_aid.agg(min)['empty'].sum()

34

In [667]:
grouped_by_aid.agg(max).shape

(700, 8)

## building frames

In [18]:
all_main_annotations_by_frame = {}
no_main_annotations_by_frame = {}
mixed_all_frames = {}


for aid, annos in annotations_by_frame.items():
    still_ids = [still['stillID'] for still in annos]
    annos = [anno for anno in annos]
    if len([sid for sid in still_ids if sid in all_agree_set]) == 9:
        all_main_annotations_by_frame[aid] = annos
    elif len([sid for sid in still_ids if sid in not_main]) == 9:
        no_main_annotations_by_frame[aid] = annos
    else: 
        mixed_all_frames[aid] = annos

print(len(all_main_annotations_by_frame), len(no_main_annotations_by_frame), len(mixed_all_frames))

322 72 306


# process subtask

In [120]:
subtask_results[0]

{'characterBoxes': '[{"left":177,"top":78,"width":193,"height":324,"label":"s_02_e_06_shot_006125_006199_char_0_taskb.png"},{"left":828,"top":78,"width":199,"height":323,"label":"s_02_e_06_shot_006125_006199_char_0_taskb.png"}]',
 'h_id': '3JTPR5MTZTCZPKX7EE019CX5MFV5KY',
 'stillID': 's_02_e_06_shot_006125_006199_taskb.png',
 'worker_id': 'A1MT1OXZGY8RJ2'}

In [121]:
subtask_assignments =[]
for assignment_triple in list(subtask_data.values()):
    subtask_assignments.extend(assignment_triple)

subtask_results = [create_result(ar) for ar in subtask_assignments]
characters_boxes = [{'h_id': anno['h_id'], 'w_id': anno['worker_id'], 'still_id': anno['stillID'], 'characters': set([ch['label'] for ch in json.loads(anno['characterBoxes'])])} for anno in subtask_results]

In [124]:
subtask_by_frame = defaultdict(list)
for anno in sorted(subtask_results, key=lambda x: x['stillID']):
    animation_id = anno['stillID'].rsplit('_', 1)[0]
    subtask_by_frame[animation_id].append(anno)

In [146]:
# for aid, animation_annos in list(subtask_by_frame.items())[:1]:
#     three_frame_img, consensus_boxes, labels = draw_animation_seq(animation_annos, cluster_from_nms)

In [174]:
frame_origin = (0, 0)
frame_size = (640, 480)
frame_o2 = (640, 0)
frame_ext = (1280, 480)

In [247]:
len(subtask_by_frame)

189

In [253]:
subtask_rev_seq = []
for aid, animation_annos in tqdm(list(subtask_by_frame.items())):
    try:
        middle_frame_annos = annotations_by_frame[aid][3:6]
        full_original_annos = annotations_by_frame[aid]
        two_frame_image, con_boxes, labels = draw_image_and_labels(animation_annos, cluster_from_nms, 1, 3, './subtask_data/frames/')
        three_frame_img, consensus_boxes, labels_orig = draw_animation_seq(full_original_annos, cluster_from_nms)
        single_frame_image, single_con_boxes, labels = draw_image_and_labels(middle_frame_annos, cluster_from_nms, 1, 3)
        left_image = two_frame_image.crop(frame_origin + frame_size)
        right_image = two_frame_image.crop(frame_o2 + frame_ext)
        imgs_comb = Image.fromarray(np.hstack([left_image, single_frame_image, right_image]))
        o_img_path = os.path.join(img_dir, aid) + '_rev.png'
        new_img_path = os.path.join(img_dir, aid) + 'subtask_rev.png'
        three_frame_img.save(os.path.join(rev_dir, o_img_path))
        imgs_comb.save(os.path.join(rev_dir, new_img_path))
        subtask_rev_seq.append((aid, o_img_path, new_img_path))
    except:
        print(aid)

 21%|██        | 39/189 [00:41<02:57,  1.19s/it]

s_01_e_16_shot_029216_029290


 47%|████▋     | 89/189 [01:35<01:43,  1.03s/it]

s_02_e_08_shot_022319_022393


 50%|████▉     | 94/189 [01:38<01:20,  1.18it/s]

s_02_e_09_shot_013583_013657


 52%|█████▏    | 99/189 [01:41<01:19,  1.13it/s]

s_02_e_10_shot_031152_031226


 61%|██████    | 115/189 [01:58<01:24,  1.15s/it]

s_02_e_14_shot_040392_040466


 97%|█████████▋| 183/189 [03:10<00:07,  1.18s/it]

s_02_e_31_shot_030424_030498


 98%|█████████▊| 186/189 [03:12<00:02,  1.05it/s]

s_02_e_32_shot_005258_005332


100%|██████████| 189/189 [03:13<00:00,  1.29it/s]


In [254]:
page_html = generate_subtask_review(subtask_rev_seq)

# bounding box clustering

In [19]:
rev_dir = 'review_results'
img_dir = 'review_frames'

In [20]:
len(annotations_by_frame)

700

In [50]:
review_seq = {}
error_frames = []
for aid, animation_annos in tqdm(list(annotations_by_frame.items())[:]):
    try:
        three_frame_img, consensus_boxes, labels = draw_animation_seq(animation_annos, cluster_from_nms)
        avg_votes = np.mean([box['votes'] for frame in consensus_boxes for box in frame])
        possible_labels = [c['possible_labels'] for c in sorted(labels, key=lambda x: x['box'][0])]
        chosen_labels = [c['chosen_labels'] for c in sorted(labels, key=lambda x: x['box'][0])]
        review_seq[aid] = {'votes': avg_votes, 'possible_labels': possible_labels, 'chosen_labels': chosen_labels}
        img_path = os.path.join(rev_dir, img_dir, aid) + '_rev.png'
        three_frame_img.save(img_path)
    except:
        error_frames.append(aid)

rev_page = generate_review_page('bbox_review_new_8_7_small.html')

100%|██████████| 50/50 [00:24<00:00,  2.10it/s]


In [111]:
subtask_stills = {}
for aid, animation_annos in tqdm(list(annotations_by_frame.items())[0:200]):
    two_frame_img, char_crops = create_subtask_data(animation_annos, cluster_from_nms)
    if char_crops:
        two_frame_img.save('./subtask_data/frames/' + aid + '_taskb.png')
        n_chars = len([char_image.save('./subtask_data/char_crops/' + aid + '_char_' + str(charn) +'_taskb.png') for charn, char_image in enumerate(char_crops)])
        subtask_stills[aid + '_taskb.png'] = n_chars

100%|██████████| 200/200 [01:31<00:00,  2.79it/s]


In [112]:
pickle_this(subtask_stills, 'subtask_stills_to_anno.pkl')

In [77]:
def crop_character_box(img, char):
    crop = img.crop(char['box'])
    return crop

## debug

In [None]:
# review_seq = {}
# error_frames = []
# n = 0
# for aid, animation_annos in list(debug_sample.items()):
#     print(aid)
# #     try:
#     three_frame_img, consensus_boxes = draw_animation_seq(animation_annos, cluster_from_nms)
#     avg_votes = np.mean([box['votes'] for frame in consensus_boxes for box in frame])
#     possible_labels = [c['possible_labels'] for c in sorted(labels, key=lambda x: x['box'][0])]
#     chosen_labels = [c['chosen_labels'] for c in sorted(labels, key=lambda x: x['box'][0])]
#     review_seq[aid] = {'votes': avg_votes, 'possible_labels': possible_labels, 'chosen_labels': chosen_labels}
#     img_path = os.path.join(rev_dir, img_dir, aid) + '_rev.png'
#     three_frame_img.save(img_path)
# #     except IndexError:
# #         error_frames.append(aid)

# rev_page = generate_review_page('bbox_review_new_8_7_debug.html')
# rev_page = rev_page.replace('review_frames', 'review_results/review_frames').replace('.png', '.png?arg' + str(random.randint(0, 100000000)))
# HTML(rev_page)

In [23]:
0.911843854359 * 1.6

1.4589501669744

In [29]:
debug_clips = ['s_01_e_06_shot_020535_020609', 's_01_e_05_shot_009896_009970', 's_01_e_06_shot_025311_025385', 's_01_e_07_shot_014729_014803', 's_01_e_06_shot_032983_033057']
debug_clips = ['s_01_e_04_shot_016588_016662', 's_01_e_05_shot_009896_009970', 's_01_e_06_shot_020535_020609', 's_01_e_01_shot_022792_022866']
debug_clips = ['s_01_e_06_shot_020535_020609']
debug_clips += ['s_01_e_01_shot_022792_022866']
debug_clips += ['s_01_e_05_shot_002673_002747']
debug_clips += ['s_01_e_06_shot_007192_007266']
debug_clips += ['s_01_e_05_shot_004222_004296']

debug_sample = {debug_clip: annotations_by_frame[debug_clip] for debug_clip in debug_clips}

In [30]:
error_clip = list(annotations_by_frame.values())[0]

three_frames = [s3_base_path + error_clip[i]['stillID'] for i in [0, 6]]
imgs_comb = np.hstack([Image.open(requests.get(image_url, stream=True).raw) for image_url in three_frames])
test_image = Image.fromarray(imgs_comb)

In [26]:
test_image.size

NameError: name 'test_image' is not defined

In [996]:
annotations_by_frame['s_01_e_19_shot_025542_025616']

[]

# bonus cost estimate

In [197]:
main_characters = [
    "Fred",
    "Wilma",
    "Mr Slate",
    "Barney",
    "Betty",
    "Pebbles",
    "Dino",
    "Baby Puss",
    "Hoppy",
    "Bamm Bamm",
]

empty_characters = ['no characters', 'empty frame']
main_characters += empty_characters
main_characters = [char.lower() for char in main_characters]

In [183]:
frame_cost = 0
bonus_cost = 0
for anno in list(review_seq.values()):
    frame_cost += 0.05 * 3
#     print(anno)
    bonus_cost += len([char for char in anno['chosen_labels'] if char not in main_characters]) * 0.03

In [184]:
print(frame_cost, bonus_cost)
frame_cost + bonus_cost

80.25000000000018 3.659999999999995


83.91000000000018

In [200]:
frame_count = []
for anno in list(review_seq.values()):
    frame_count += len([char for char in anno['chosen_labels'] if char not in empty_characters])

In [201]:
frame_count

787

# HTML Review

In [241]:
review_page_html = """
<!DOCTYPE html>
<html>
  <head>
    <style type="text/css">
       .container {
          }
    </style>
  </head>
  <body style=max-width: 100px>
    <div class="container">
      <ul>
        {% for clip in clips %}
        <h2>clip: {{clip[0]}}</h2>
        <h3>score: {{clip[2]}}      rank: {{clip[3]}}</h3>
        <p>{{clip[1]}}</p>
        <p>{{clip[4]}}</p>
        <p>{{clip[5]}}</p>
        {% endfor %}
      </ul>
    </div>
    <script src="http://code.jquery.com/jquery-1.10.2.min.js"></script>
    <script src="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/js/bootstrap.min.js"></script>
  </body>
</html>
"""


review_subtask_comp = """
<!DOCTYPE html>
<html>
  <head>
    <style type="text/css">
       .container {
          }
    </style>
  </head>
  <body style=max-width: 100px>
    <div class="container">
      <ul>
        {% for clip in clips %}
        <h2>clip: {{clip[0]}}</h2>
        <h4>original 3 frames:</h4>
        <p>{{clip[1]}}</p>
        <h4>new frames + original middle:</h4>
        <p>{{clip[2]}}</p>
        {% endfor %}
      </ul>
    </div>
    <script src="http://code.jquery.com/jquery-1.10.2.min.js"></script>
    <script src="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/js/bootstrap.min.js"></script>
  </body>
</html>
"""

In [255]:
s3_base_path = 'https://s3-us-west-2.amazonaws.com/ai2-vision-animation-gan/annotation_data/still_frames/'


def make_uri(clip_id, s3_base='review_frames', file_ext='_rev.png'):
    s3_uri = s3_base + clip_id + file_ext
    image_tag = '<img src="' + s3_uri +'" width=800px>'
    return image_tag

def make_comp_uri(clip_id, s3_base=s3_base_path):
    s3_uri = s3_base + clip_id
    image_tag = '<img src="' + s3_uri +'" width=800px>'
    return image_tag


def generate_review_page(out_page='bbox_review.html'):
    images_sorted_by_conf = [make_uri(img[0]) for img in sorted(review_seq.items(), key=lambda x: x[1]['votes'])]
    sorted_clip_ids = [img[0] for img in sorted(review_seq.items(), key=lambda x: x[1]['votes'])]
    votes = [img[1]['votes'] for img in sorted(review_seq.items(), key=lambda x: x[1]['votes'])]
    chosen_labels = [img[1]['chosen_labels'] for img in sorted(review_seq.items(), key=lambda x: x[1]['votes'])]
    possible_labels = [img[1]['possible_labels'] for img in sorted(review_seq.items(), key=lambda x: x[1]['votes'])]
    
    send_to_template = zip(sorted_clip_ids, images_sorted_by_conf, votes, range(1, len(votes) + 1)[::-1], chosen_labels, possible_labels)
    env = Environment()
    template =  j2env.from_string(review_page_html)
    html_dir = './review_results/' 
    html_out_file = os.path.join(html_dir, out_page)
    if not os.path.exists(html_dir):
        os.makedirs(html_dir)
    page_html =  template.render(clips = send_to_template)
    
    with open(html_out_file, 'w') as f:
        f.write(page_html.encode('ascii', 'ignore').decode('utf-8'))
    return page_html


def generate_subtask_review(rev_seq, out_page='review_subtask_comp.html'):
    env = Environment()
    rev_seq = [(rev[0], make_comp_uri(rev[1]), make_comp_uri(rev[2])) for rev in rev_seq]
    template =  j2env.from_string(review_subtask_comp)
    html_dir = './review_results/' 
    html_out_file = os.path.join(html_dir, out_page)
    if not os.path.exists(html_dir):
        os.makedirs(html_dir)
    page_html =  template.render(clips = rev_seq)
    
    with open(html_out_file, 'w') as f:
        f.write(page_html.encode('ascii', 'ignore').decode('utf-8'))
        
    return page_html

In [256]:
page_html = generate_subtask_review(subtask_rev_seq)

# hide

In [None]:
vote_series = pd.Series([vote for vote in review_seq.values()])

In [13]:
# assignments =[]
# for assignment_triple in list(results.values()):
#     assignments.extend(assignment_triple)

# assignment_results = [create_result(ar) for ar in assignments]

# characters_present = [{'h_id': anno['h_id'], 'w_id': anno['worker_id'], 'still_id': anno['stillID'], 'characters': set([ch['label'] for ch in json.loads(anno['characterBoxes'])])} for anno in assignment_results]

# main_characters =  {"fred",
#              "barney",
#               "wilma",
#               "betty",
#               "pebbles",
#               "bamm bamm",
#               "dino",
#               "mr slate",
#               "baby puss",
#               "hoppy",
#               "no characters"}

# def all_main(char_set, main_chars=main_characters):
#     return not bool(char_set.difference(main_chars))

# char_df = pd.DataFrame(characters_present)

# char_df['all_main'] = char_df['characters'].apply(lambda x: all_main(x))

# all_chars = [c for char_list in char_df['characters'].apply(lambda x: list(x)).tolist() for c in char_list]
# char_counts = pd.Series(all_chars).value_counts()

# # char_counts.hist(bins = 30, log=True)

# char_df['n_chars'] = char_df['characters'].apply(lambda x: len(x))

# prolific_workers = char_df['w_id'].value_counts().index.tolist()[:5]

# grouped_by_still = char_df.groupby('still_id')

# agg_df = grouped_by_still.aggregate(len)

# import scipy

# agged_on_mode = grouped_by_still.agg(lambda x: scipy.stats.mode(x))
# consensus_results_df = agged_on_mode.reset_index()

# consensus_results_df['count_mode'] = consensus_results_df['n_chars'].apply(lambda x: x[1][0])

# consensus_results_df

# consensus_results_df['mode_count'] = consensus_results_df['characters'].apply(lambda x: x[1][0])

# consensus_results_df['all_agree'] = consensus_results_df['count_mode'] == 3
# consensus_results_df['two_agree'] = consensus_results_df['count_mode'] == 2

In [14]:
# char_count_df = consensus_results_df[consensus_results_df['count_mode'] == 3]

# # consensus_results_df[agree_all_main]['characters'].apply(lambda x: x[1][0]).value_counts()

# not_main =set(consensus_results_df[~agree_all_main]['still_id'].tolist())

# not_main_df = char_df[char_df['still_id'].isin(not_main)]

# consensus_results_df.head(1)

# agree_all_main = consensus_results_df['all_main'].apply(lambda x: x[1][0]) == 3 

# all_agree = consensus_results_df[agree_all_main][consensus_results_df[agree_all_main]['all_agree']]
# disagree = consensus_results_df[agree_all_main][~consensus_results_df[agree_all_main]['all_agree']]







# all_agree_set = set(all_agree['still_id'].tolist())

# count_all_agree_set = set(consensus_results_df[consensus_results_df['count_mode'] == 3]['still_id'].tolist())
# count_two_agree_set = set(consensus_results_df[consensus_results_df['count_mode'] == 2]['still_id'].tolist())
# count_none_agree_set = set(consensus_results_df[consensus_results_df['count_mode'] == 1]['still_id'].tolist())

# conflict_images = set(disagree['still_id'].tolist())

# conflict_df = char_df[char_df['still_id'].isin(conflict_images)]

# import PIL.Image as Image
# import requests

# image_n = 10

# consensus_results_df.iloc[image_n * 9: image_n * 9 + 3]

# s3_base_path = 'https://s3-us-west-2.amazonaws.com/ai2-vision-animation-gan/annotation_data/still_frames/'

# image_url = s3_base_path + consensus_results_df['still_id'].iloc[image_n *  9]
# Image.open(requests.get(image_url, stream=True).raw)