In [1]:
import json
import pandas as pd

In [5]:
model_path = '../model/model-best'
provider_perf_dir = '../results/models-performance'
with open(f'{model_path}/meta.json', 'r') as fin:
    genStats = json.load(fin)

provStats = {}
for provider in ('WIPO', 'FORUM', 'CAC', 'ADNDRC', 'CIIDRC'):
    with open(f'{provider_perf_dir}/{provider}.json', 'r') as fin:
        provStats[provider] = json.load(fin)

out_arr = []
for entity, score in genStats['performance']['ents_per_type'].items():
    out_arr += [('All', entity, score['f'])]

    for provider, data in provStats.items():
        out_arr += [(provider, entity, data['ents_per_type'].get(entity, {'f': None})['f'])]

# make overall entities
out_arr += [('All', 'Overall', genStats['performance']['ents_f'])]
for provider, data in provStats.items():
    out_arr += [(provider, 'Overall', data['ents_f'])]

statsDf = pd.DataFrame(out_arr, columns = ['provider', 'entity', 'score'])
# print(
statsDf.pivot(index = 'entity', columns = 'provider', values = 'score').loc[
    [
        'FILING_DATE', 'REGISTRAR_REQ_DATE', 'REGISTRAR_REQ_RESP', 'COMMENCEMENT_DATE', 
        'APPT_DATE', 'PUB_DATE', 'COMPLAINANT', 'COMP_LOC', 'REPR_ORG', 'RESPONDENT', 'RESP_LOC',
        'NO_RESPONSE', 'PANELIST', 'REGISTRAR', 'TRADEMARK',
        'Overall'
    ],
    ['WIPO', 'FORUM', 'CAC', 'ADNDRC', 'CIIDRC', 'All']
]

provider,WIPO,FORUM,CAC,ADNDRC,CIIDRC,All
entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
FILING_DATE,0.979592,1.0,1.0,0.833333,0.0,0.93913
REGISTRAR_REQ_DATE,0.962963,,,0.875,0.333333,0.857143
REGISTRAR_REQ_RESP,0.958333,0.947368,,0.666667,0.666667,0.851064
COMMENCEMENT_DATE,0.88,0.944444,,0.846154,0.666667,0.834783
APPT_DATE,0.923077,1.0,,0.45,0.0,0.890909
PUB_DATE,0.980392,1.0,1.0,1.0,0.75,0.912281
COMPLAINANT,0.724138,0.784314,1.0,0.816327,1.0,0.663366
COMP_LOC,0.705882,1.0,,0.454545,,0.698795
REPR_ORG,0.893617,0.944444,0.571429,0.153846,0.571429,0.742268
RESPONDENT,0.615385,0.731707,0.666667,0.816327,0.727273,0.703704


In [18]:
# each feature group represents the features we used to study a given behavior
stats = []
for no, features in enumerate([
    ("FILING_DATE", "COMP_LOC", "REPR_ORG", "RESP_LOC", "PANELIST"),
    ("REGISTRAR_REQ_DATE", "REGISTRAR_REQ_RESP", "REGISTRAR"),
    ("COMMENCEMENT_DATE", "APPT_DATE"),
    ("FILING_DATE", "REPR_ORG", "PANELIST"),
    ("APPT_DATE", "PUB_DATE", "PANELIST"),
    ("FILING_DATE", "PUB_DATE", "REGISTRAR")
]):
    perf = statsDf.pivot(index = 'entity', columns = 'provider', values = 'score').loc[features, [
        'WIPO', 'FORUM', 'CAC', 'ADNDRC', 'CIIDRC'
    ]].median(axis = 0).to_dict()
    perf.update({'Behavior': f'B{no + 1}'})
    stats.append(perf)

pd.DataFrame(stats).set_index('Behavior')

Unnamed: 0_level_0,WIPO,FORUM,CAC,ADNDRC,CIIDRC
Behavior,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
B1,0.893617,0.944444,1.0,0.571429,0.208333
B2,0.958333,0.925297,,0.666667,0.571429
B3,0.901538,0.972222,,0.648077,0.333333
B4,0.960784,0.944444,1.0,0.833333,0.416667
B5,0.960784,1.0,1.0,0.909091,0.416667
B6,0.979592,1.0,1.0,0.833333,0.571429
