In [1]:
import json
import pandas as pd

In [2]:
model_path = '../model/model-best'
provider_perf_dir = '../results/models-performance'
with open(f'{model_path}/meta.json', 'r') as fin:
    genStats = json.load(fin)

provStats = {}
for provider in ('WIPO', 'FORUM', 'CAC', 'ADNDRC', 'CIIDRC'):
    with open(f'{provider_perf_dir}/{provider}.json', 'r') as fin:
        provStats[provider] = json.load(fin)

out_arr = []
for entity, score in genStats['performance']['ents_per_type'].items():
    out_arr += [('All', entity, score['f'])]

    for provider, data in provStats.items():
        out_arr += [(provider, entity, data['ents_per_type'].get(entity, {'f': None})['f'])]

# make overall entities
out_arr += [('All', 'Overall', genStats['performance']['ents_f'])]
for provider, data in provStats.items():
    out_arr += [(provider, 'Overall', data['ents_f'])]

statsDf = pd.DataFrame(out_arr, columns = ['provider', 'entity', 'score'])
# print(
statsDf.pivot(index = 'entity', columns = 'provider', values = 'score').loc[
    [
        'FILING_DATE', 'REGISTRAR_REQ_DATE', 'REGISTRAR_REQ_RESP', 'COMMENCEMENT_DATE', 
        'APPT_DATE', 'PUB_DATE', 'COMPLAINANT', 'COMP_LOC', 'REPR_ORG', 'RESPONDENT', 'RESP_LOC',
        'NO_RESPONSE', 'PANELIST', 'REGISTRAR', 'TRADEMARK',
        'Overall'
    ],
    ['WIPO', 'FORUM', 'CAC', 'ADNDRC', 'CIIDRC', 'All']
]

provider,WIPO,FORUM,CAC,ADNDRC,CIIDRC,All
entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
FILING_DATE,1.0,1.0,1.0,0.062827,0.4,0.233546
REGISTRAR_REQ_DATE,0.943396,,,0.823529,1.0,0.918919
REGISTRAR_REQ_RESP,0.956522,0.962963,,0.75,0.5,0.891089
COMMENCEMENT_DATE,0.862745,1.0,,0.625,0.285714,0.79661
APPT_DATE,0.916667,1.0,,0.842105,1.0,0.930693
PUB_DATE,0.962963,1.0,1.0,1.0,1.0,0.983871
COMPLAINANT,0.872727,0.85,0.333333,0.763636,0.363636,0.778443
COMP_LOC,0.84,1.0,,0.818182,0.0,0.810127
REPR_ORG,0.883721,0.956522,0.8,0.222222,,0.825
RESPONDENT,0.685714,0.933333,1.0,0.690909,0.571429,0.738095


In [3]:
# each feature group represents the features we used to study a given behavior
stats = []
for no, features in enumerate([
    ("FILING_DATE", "COMP_LOC", "REPR_ORG", "RESP_LOC", "PANELIST"),
    ("REGISTRAR_REQ_DATE", "REGISTRAR_REQ_RESP", "REGISTRAR"),
    ("COMMENCEMENT_DATE", "APPT_DATE"),
    ("FILING_DATE", "REPR_ORG", "PANELIST"),
    ("APPT_DATE", "PUB_DATE", "PANELIST"),
    ("FILING_DATE", "PUB_DATE", "REGISTRAR")
]):
    perf = statsDf.pivot(index = 'entity', columns = 'provider', values = 'score').loc[features, [
        'WIPO', 'FORUM', 'CAC', 'ADNDRC', 'CIIDRC'
    ]].median(axis = 0).to_dict()
    perf.update({'Behavior': f'B{no + 1}'})
    stats.append(perf)

pd.DataFrame(stats).set_index('Behavior')

Unnamed: 0_level_0,WIPO,FORUM,CAC,ADNDRC,CIIDRC
Behavior,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
B1,0.883721,0.965517,0.8,0.636364,0.2
B2,0.943396,0.891318,,0.75,0.833333
B3,0.889706,1.0,,0.733553,0.642857
B4,0.909091,0.956522,0.8,0.222222,0.435294
B5,0.916667,1.0,0.833333,0.93617,1.0
B6,0.962963,1.0,1.0,0.684211,0.833333
