# Table of Contents
* [load data](#load-data)
* [bounding box clustering](#bounding-box-clustering)
	* [code](#code)
	* [run](#run)


In [630]:
%%capture
from __future__ import division
import numpy as np
import pandas as pd
import scipy.stats as st
import itertools
import math
from collections import Counter, defaultdict
%load_ext autoreload
%autoreload 2

import matplotlib as mpl
mpl.use("Agg")
import matplotlib.pylab as plt
#%matplotlib notebook
%matplotlib inline
%load_ext base16_mplrc
%base16_mplrc light default
plt.rcParams['figure.figsize'] = (16.0, 10.0)

import re
# from datetime import datetime
# import dateutil.parser as dt_parse
import pickle
import boto
from copy import deepcopy
import json
import os
import jinja2
from IPython.core.display import HTML

# import boto.mturk.connection as tc
# import boto.mturk.question as tq
from boto.mturk.qualification import PercentAssignmentsApprovedRequirement, Qualifications, Requirement

from keysTkingdom import mturk_ai2
from keysTkingdom import aws_tokes
from keysTkingdom import mturk_aristo

from tqdm import tqdm
import random

# import pdfextraction.amt_boto_modules as amt_util
# from tqa_utils import Evaluator

# load data

In [5]:
s3_base_path = 'https://s3-us-west-2.amazonaws.com/ai2-vision-animation-gan/annotation_data/still_frames/'

In [564]:
def create_result(assmt):
    result = json.loads(assmt.answers[0][0].fields[0])
    result['h_id'] = assmt.HITId
    result['worker_id'] = assmt.WorkerId
    return result

In [8]:
import pickle

def pickle_this(results_df, file_name):
    with open(file_name, 'w') as f:
        pickle.dump(results_df, f)

def un_pickle_this(file_name):
    with open(file_name, 'rb') as f:
        results_df = pickle.load(f)
    return results_df

In [73]:
batch_2_data = un_pickle_this('batch_2.pkl')

In [80]:
combined_data = {k: v for d in batch_2_data.values() for k, v in d.items() }

In [76]:
combined_data.keys()

dict_keys(['mixed', 'dirty', 'clean'])

In [130]:
sb, dp = test_nms()

----------
Selected Boxes
----------
{'box': array([173,  57, 338, 338]), 'area': 46365, 'label': 'fred', 'idx': 0, 'votes': 3}
{'box': array([474, 117, 589, 375]), 'area': 29670, 'label': 'worker', 'idx': 2, 'votes': 3}
{'box': array([379,  67, 464, 355]), 'area': 24480, 'label': 'workers', 'idx': 1, 'votes': 2}
----------
Duplicate Boxes
----------
{'box': array([ 66,  39, 322, 364]), 'area': 83200, 'label': 'fred', 'idx': 3, 'duplicate_of': 0}
{'box': array([368,  51, 590, 360]), 'area': 68598, 'label': 'no characters', 'idx': 4, 'duplicate_of': 2}
{'box': array([166,  55, 339, 329]), 'area': 47402, 'label': 'fred', 'idx': 5, 'duplicate_of': 0}
{'box': array([454, 117, 592, 365]), 'area': 34224, 'label': 'balding businessman', 'idx': 7, 'duplicate_of': 2}
{'box': array([372,  70, 483, 359]), 'area': 32079, 'label': 'mr slate', 'idx': 6, 'duplicate_of': 1}


In [124]:
dp

[{'area': 83200,
  'box': array([ 66,  39, 322, 364]),
  'duplicate_of': 0,
  'idx': 3,
  'label': 'fred'},
 {'area': 68598,
  'box': array([368,  51, 590, 360]),
  'duplicate_of': 2,
  'idx': 4,
  'label': 'no characters'},
 {'area': 47402,
  'box': array([166,  55, 339, 329]),
  'duplicate_of': 0,
  'idx': 5,
  'label': 'fred'},
 {'area': 34224,
  'box': array([454, 117, 592, 365]),
  'duplicate_of': 2,
  'idx': 7,
  'label': 'balding businessman'},
 {'area': 32079,
  'box': array([372,  70, 483, 359]),
  'duplicate_of': 1,
  'idx': 6,
  'label': 'mr slate'}]

In [108]:
sb

[{'area': 46365,
  'box': array([173,  57, 338, 338]),
  'idx': 0,
  'label': 'fred',
  'votes': 3},
 {'area': 29670,
  'box': array([474, 117, 589, 375]),
  'idx': 2,
  'label': 'worker',
  'votes': 3},
 {'area': 24480,
  'box': array([379,  67, 464, 355]),
  'idx': 1,
  'label': 'workers',
  'votes': 2}]

# bounding box clustering

## code 

In [52]:
from amt_utils.bboxes import test_nms
from amt_utils.bboxes import cluster_from_nms
from amt_utils.bboxes import draw_animation_seq
from amt_utils.bboxes import cluster_from_annos

In [63]:
annotation_sample = batch_2_data['mixed']
aid, animation_annos =  random.choice(list(annotation_sample.items()))

In [291]:
animation_annos[:3]

[{'characterBoxes': '[{"left":313,"top":127,"width":102,"height":113,"label":"fred"},{"left":268,"top":157,"width":75,"height":81,"label":"barney"}]',
  'h_id': '3QTFNPMJC7IY442ZIFZXQ8L4H45NZF',
  'stillID': 's_01_e_23_shot_029337_029411_10.png',
  'worker_id': 'A39AXTTH0QVFA3'},
 {'characterBoxes': '[{"left":340,"top":127,"width":85,"height":127,"label":"no characters"},{"left":262,"top":172,"width":96,"height":100,"label":"no characters"}]',
  'h_id': '3QTFNPMJC7IY442ZIFZXQ8L4H45NZF',
  'stillID': 's_01_e_23_shot_029337_029411_10.png',
  'worker_id': 'A300BK2S2PO3Q7'},
 {'characterBoxes': '[{"left":271,"top":163,"width":72,"height":72,"label":"barney"},{"left":336,"top":129,"width":78,"height":114,"label":"fred"},{"left":273,"top":165,"width":71,"height":69,"label":"blonde man with brown shirt and green hat"},{"left":336,"top":125,"width":81,"height":117,"label":"man with black hair, orange shirt with balck dots and blue tie with a green hat"}]',
  'h_id': '3QTFNPMJC7IY442ZIFZXQ8L4H4

In [None]:
's_01_e_23_shot_027533_027607'

In [509]:
rev_dir = 'review_results'
img_dir = 'review_frames'

In [518]:
review_seq

{'review_results/review_frames/s_01_e_08_shot_015818_015892_rev.png': 2.6666666666666665}

In [None]:
# test_frame = 's_01_e_22_shot_013933_014007'
# test_frame = 's_01_e_23_shot_029337_029411'
# test_frame = 's_01_e_22_shot_033714_033788'
test_frame = 's_01_e_10_shot_013310_013384'

annotation_sample = batch_2_data['mixed']
# aid, animation_annos =  random.choice(list(annotation_sample.items()))
# aid, animation_annos = test_frame, combined_data[test_frame]

In [590]:
from tqdm import tqdm

In [591]:
def cluster_and_render_frames(animation_annos):
    three_frame_img, consensus_boxes = draw_animation_seq(animation_annos, cluster_from_nms)
    avg_votes = np.mean([box['votes'] for frame in consensus_boxes for box in frame])
    img_path = os.path.join(rev_dir, img_dir, aid) + '_rev.png'
    review_seq[aid] = avg_votes
    three_frame_img.save(img_path)

In [616]:
review_seq = {}

for aid, animation_annos in tqdm(list(combined_data.items())):
    try:
        three_frame_img, consensus_boxes = draw_animation_seq(animation_annos, cluster_from_nms)
        avg_votes = np.mean([box['votes'] for frame in consensus_boxes for box in frame])
        img_path = os.path.join(rev_dir, img_dir, aid) + '_rev.png'
        review_seq[aid] = avg_votes
#         three_frame_img.save(img_path)
    except:
        print(aid)

 16%|█▋        | 33/200 [00:01<00:05, 33.21it/s]

s_01_e_10_shot_032448_032522


100%|██████████| 200/200 [00:06<00:00, 32.33it/s]


In [621]:
len(review_seq)

199

In [631]:
vote_series = pd.Series([vote for vote in review_seq.values()])

In [633]:
# _ = vote_series.hist(bins=20)

In [607]:
save_rev_seq = deepcopy(review_seq)

# HTML Review

In [377]:
import os
import jinja2
import argparse
from jinja2 import Environment, FileSystemLoader

j2env = jinja2.Environment()

In [597]:
review_page_html = """
<!DOCTYPE html>
<html>
  <head>
    <style type="text/css">
       .container {
          }
    </style>
  </head>
  <body style=max-width: 100px>
    <div class="container">
      <ul>
        {% for clip in clips %}
        <h2>Clip: {{clip[0]}}</h2>
        <p>{{clip[1]}}</p>
        {% endfor %}
      </ul>
    </div>
    <script src="http://code.jquery.com/jquery-1.10.2.min.js"></script>
    <script src="http://netdna.bootstrapcdn.com/bootstrap/3.0.0/js/bootstrap.min.js"></script>
  </body>
</html>
"""

In [613]:
def make_uri(clip_id, s3_base='review_frames/', file_ext='_rev.png'):
    s3_uri = s3_base + clip_id + file_ext
    image_tag = '<img src="' + s3_uri +'" width=800px>'
    return image_tag


def generate_review_page():
    images_sorted_by_conf = [make_uri(img[0]) for img in sorted(review_seq.items(), key=lambda x: x[1])]
    sorted_clip_ids = [img[0] for img in sorted(review_seq.items(), key=lambda x: x[1])]
    env = Environment()
    template =  j2env.from_string(review_page_html)
    html_dir = './review_results/' 
    html_out_file = os.path.join(html_dir, 'bbox_review.html')
    if not os.path.exists(html_dir):
        os.makedirs(html_dir)
    page_html =  template.render(clips = list(zip(sorted_clip_ids, images_sorted_by_conf)))
    
    with open(html_out_file, 'w') as f:
        f.write(page_html.encode('ascii', 'ignore').decode('utf-8'))
        
    return page_html

_ = generate_review_page()

# hide

In [13]:
# assignments =[]
# for assignment_triple in list(results.values()):
#     assignments.extend(assignment_triple)

# assignment_results = [create_result(ar) for ar in assignments]

# characters_present = [{'h_id': anno['h_id'], 'w_id': anno['worker_id'], 'still_id': anno['stillID'], 'characters': set([ch['label'] for ch in json.loads(anno['characterBoxes'])])} for anno in assignment_results]

# main_characters =  {"fred",
#              "barney",
#               "wilma",
#               "betty",
#               "pebbles",
#               "bamm bamm",
#               "dino",
#               "mr slate",
#               "baby puss",
#               "hoppy",
#               "no characters"}

# def all_main(char_set, main_chars=main_characters):
#     return not bool(char_set.difference(main_chars))

# char_df = pd.DataFrame(characters_present)

# char_df['all_main'] = char_df['characters'].apply(lambda x: all_main(x))

# all_chars = [c for char_list in char_df['characters'].apply(lambda x: list(x)).tolist() for c in char_list]
# char_counts = pd.Series(all_chars).value_counts()

# # char_counts.hist(bins = 30, log=True)

# char_df['n_chars'] = char_df['characters'].apply(lambda x: len(x))

# prolific_workers = char_df['w_id'].value_counts().index.tolist()[:5]

# grouped_by_still = char_df.groupby('still_id')

# agg_df = grouped_by_still.aggregate(len)

# import scipy

# agged_on_mode = grouped_by_still.agg(lambda x: scipy.stats.mode(x))
# consensus_results_df = agged_on_mode.reset_index()

# consensus_results_df['count_mode'] = consensus_results_df['n_chars'].apply(lambda x: x[1][0])

# consensus_results_df

# consensus_results_df['mode_count'] = consensus_results_df['characters'].apply(lambda x: x[1][0])

# consensus_results_df['all_agree'] = consensus_results_df['count_mode'] == 3
# consensus_results_df['two_agree'] = consensus_results_df['count_mode'] == 2

In [14]:
# char_count_df = consensus_results_df[consensus_results_df['count_mode'] == 3]

# # consensus_results_df[agree_all_main]['characters'].apply(lambda x: x[1][0]).value_counts()

# not_main =set(consensus_results_df[~agree_all_main]['still_id'].tolist())

# not_main_df = char_df[char_df['still_id'].isin(not_main)]

# consensus_results_df.head(1)

# agree_all_main = consensus_results_df['all_main'].apply(lambda x: x[1][0]) == 3 

# all_agree = consensus_results_df[agree_all_main][consensus_results_df[agree_all_main]['all_agree']]
# disagree = consensus_results_df[agree_all_main][~consensus_results_df[agree_all_main]['all_agree']]







# all_agree_set = set(all_agree['still_id'].tolist())

# count_all_agree_set = set(consensus_results_df[consensus_results_df['count_mode'] == 3]['still_id'].tolist())
# count_two_agree_set = set(consensus_results_df[consensus_results_df['count_mode'] == 2]['still_id'].tolist())
# count_none_agree_set = set(consensus_results_df[consensus_results_df['count_mode'] == 1]['still_id'].tolist())

# conflict_images = set(disagree['still_id'].tolist())

# conflict_df = char_df[char_df['still_id'].isin(conflict_images)]

# import PIL.Image as Image
# import requests

# image_n = 10

# consensus_results_df.iloc[image_n * 9: image_n * 9 + 3]

# s3_base_path = 'https://s3-us-west-2.amazonaws.com/ai2-vision-animation-gan/annotation_data/still_frames/'

# image_url = s3_base_path + consensus_results_df['still_id'].iloc[image_n *  9]
# Image.open(requests.get(image_url, stream=True).raw)