# TODO's
* table with number of candidates per setting (confusion_tuple + min_confusion)
    * min, avg, max for int or float properties

In [1]:
import pandas
from tabulate import tabulate
from IPython.core.display import display, HTML
from statistics import mean
from collections import Counter

In [2]:
import look_up_utils
import createq_utils

## Load frames

In [3]:
frames = ['mass_shootings',
          'mass_shootings_2013',
          'mass_shootings_2014',
          'mass_shootings_2015']
df = pandas.concat([pandas.read_pickle('../EventRegistries/GunViolence/frames/' + frame)
                    for frame in frames])

## Load look_up

In [4]:
look_up, parameters2incident_uris = look_up_utils.create_look_up(frames)

## Create table

In [6]:
lists_of_lists = []
wanted_attrs = ['answer', 'n2s_ratio', 
                'a_avg_date_spread', 'a_avg_num_sources', 
                'n_avg_date_spread', 'n_avg_num_sources']

headers = ['confusion', 'min_confusion', '#cand', 'granularity'] + wanted_attrs 

for confusion_tuple in [('location', 'time'),
                        ('location', 'participant'),
                        ('participant', 'time'),
                        ]:
    for min_confusion in range(2, 4):
        print(confusion_tuple, min_confusion)
        candidates=createq_utils.lookup_and_merge(look_up, 
                                                  parameters2incident_uris,
                                                  confusion_tuple,
                                                  min_confusion,
                                                  df,
                                                  debug=False,
                                                  inspect_one=False) 
        
        gran_distr = Counter([getattr(cand, 'granularity') 
                              for cand in candidates])
        
        one_row = [confusion_tuple, min_confusion, len(candidates), gran_distr]
        
        for attr in wanted_attrs:
            values = [getattr(cand, attr) for cand in candidates]
            row_value = (round(min(values), 1), 
                         round(mean(values), 1), 
                         round(max(values), 1))
            one_row.append(row_value)
        
        lists_of_lists.append(one_row)

stats_df = pandas.DataFrame(lists_of_lists, columns=headers)
table = tabulate(stats_df, headers='keys', tablefmt='html')
display(HTML(table))

('location', 'time') 2
('location', 'time') 3
('location', 'participant') 2
('location', 'participant') 3
('participant', 'time') 2
('participant', 'time') 3


Unnamed: 0,confusion,min_confusion,#cand,granularity,answer,n2s_ratio,a_avg_date_spread,a_avg_num_sources,n_avg_date_spread,n_avg_num_sources
0,"('location', 'time')",2,17,"Counter({('city', 'year'): 13, ('address', 'year'): 2, ('city', 'month'): 2})","(2, 2.2, 3)","(11.5, 116.2, 193.0)","(0, 68.4, 815)","(1.0, 2.7, 5.0)","(35.8, 60.8, 148.8)","(1.8, 2.5, 3.1)"
1,"('location', 'time')",3,3,"Counter({('city', 'year'): 3})","(3, 3, 3)","(91.0, 94.6, 96.5)","(0.7, 22.2, 37.2)","(2.3, 2.8, 3.5)","(37.3, 48.5, 56.4)","(1.8, 2.7, 3.1)"
2,"('location', 'participant')",2,467,"Counter({('state', 'last'): 186, ('state', 'first'): 178, ('city', 'last'): 43, ('city', 'first'): 38, ('state', 'full_name'): 11, ('city', 'full_name'): 11})","(2, 2.4, 8)","(0.0, 31.1, 104.5)","(0, 106.0, 1054)","(0.5, 4.1, 17.5)","(-1, 69.9, 448)","(0, 2.9, 9.3)"
3,"('location', 'participant')",3,101,"Counter({('state', 'last'): 51, ('state', 'first'): 39, ('city', 'last'): 6, ('city', 'first'): 3, ('state', 'full_name'): 1, ('city', 'full_name'): 1})","(3, 3.6, 8)","(0.3, 25.6, 64.3)","(0, 98.9, 758.7)","(1.3, 3.9, 11.0)","(0, 68.0, 174.1)","(2.0, 3.0, 4.5)"
4,"('participant', 'time')",2,936,"Counter({('first', 'year'): 390, ('last', 'year'): 280, ('first', 'month'): 114, ('last', 'month'): 109, ('full_name', 'year'): 14, ('last', 'day'): 13, ('full_name', 'month'): 9, ('first', 'day'): 6, ('full_name', 'day'): 1})","(2, 2.8, 17)","(0.5, 99.3, 197.5)","(0, 117.4, 1438.5)","(0.5, 4.2, 19.0)","(0, 73.5, 311.1)","(1.0, 2.8, 5.5)"
5,"('participant', 'time')",3,309,"Counter({('first', 'year'): 155, ('last', 'year'): 120, ('last', 'month'): 20, ('first', 'month'): 12, ('last', 'day'): 1, ('full_name', 'year'): 1})","(3, 4.3, 17)","(5.0, 77.5, 132.7)","(0, 129.2, 915.3)","(1.3, 4.1, 14.0)","(0.8, 74.1, 205.7)","(1.8, 2.7, 5.0)"
