In [1]:
import os
from SPARQLWrapper import SPARQLWrapper, JSON
import json
import pandas as pd

In [22]:
endpoint = "http://172.20.10.2:9999/blazegraph/sparql" # SPARQL endpoint hosting ITO.owl
prefixes = """
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix ito: <https://identifiers.org/ito:>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
"""

def query(service, query, numeric_cols = []):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()
    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)
        
    df = pd.DataFrame(out, columns=cols)
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col])
    
    return df

In [63]:
q = """
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix ito: <https://identifiers.org/ito:>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

select ?performance_measure_label ?result ?date ?benchmark_dataset_label ?benchmark_label ?individual_process_label
where {
    ?performance_measure rdfs:subPropertyOf* ito:performance_measure .
    ?performance_measure rdfs:label ?performance_measure_label .
    ?benchmark_process_individual ?performance_measure ?result .
    ?benchmark_process_individual <http://www.geneontology.org/formats/oboInOwl#date> ?date .
    ?benchmark_process_individual ito:has_input ?benchmark_dataset .
    ?benchmark_dataset rdfs:label ?benchmark_dataset_label .
    ?benchmark_process_individual a ?benchmark_process .
    ?benchmark_process rdfs:subClassOf* ?high_level_process .
    ?high_level_process rdfs:subClassOf ito:ITO_01625 .
    ?high_level_process rdfs:label ?benchmark_label .
    ?benchmark_process_individual rdfs:label ?individual_process_label .
}
"""

df = query(endpoint, q)

df.head()

Unnamed: 0,performance_measure_label,result,date,benchmark_dataset_label,benchmark_label,individual_process_label
0,Mean Error Rate,5.09,2019-10-15,COFW dataset,Benchmarking,CHR2C (Inter-pupils Norm) model in \'Cascade o...
1,Mean Error Rate,5.27,2018-09-01,COFW dataset,Benchmarking,DCFE model in \'A Deeply-initialized Coarse-to...
2,Mean Error Rate,5.04,2022-02-04,COFW dataset,Benchmarking,MNN+OR (Inter-pupils Norm) model in \'Multi-ta...
3,Mean Error Rate,5.11,2019-02-05,COFW dataset,Benchmarking,3DDE (Inter-pupil Norm) model in \'Face Alignm...
4,Mean Error Rate,3.02,2021-11-03,COFW dataset,Benchmarking,SH-FAN model in \'Subpixel Heatmap Regression ...


In [65]:
# Save for future reference; query is somewhat slow
df.to_csv('benchmark_dataset_raw.csv')

In [99]:
df = pd.read_csv('benchmark_dataset_raw.csv')

# Any duplicated entries?

print(df[['date','result','individual_process_label']].duplicated().sum())

1134511


In [100]:
# Remove future benchmarks; dataset was last updated in September 2022

df = df[df['date']<= '2022-10-01']

# Remove "Benchmarking"

df = df[df['benchmark_label'] != 'Benchmarking']

# Keep only most common performance measure labels

df['pm_counts'] = df.groupby('performance_measure_label')['performance_measure_label'].transform(lambda x: x.count())
df = df.sort_values(by='pm_counts',ascending=False).groupby(['date','result','individual_process_label']).nth(0).reset_index().drop(columns='Unnamed: 0')
df.to_csv('benchmark_dataset_deduplicated.csv')

In [101]:
# Any duplicated entries?

print(df[['date','result','individual_process_label']].duplicated().sum())

0


In [117]:
# What metrics are in the dataset?

df['performance_measure_label'].value_counts().head(50)

Accuracy              5058
Score                 2282
F1                    1488
PSNR                  1350
Top 1 Accuracy        1079
mIoU                   993
SSIM                   886
mAP                    880
Number of params       748
FID                    695
Top-1 Accuracy         671
AP                     637
Percentage correct     525
AUC                    520
Rank-1                 459
Top 5 Accuracy         457
Mean IoU               457
MAE                    455
MAP                    442
AP50                   425
box AP                 421
AP75                   391
MRR                    360
Precision              320
GFLOPs                 319
ROUGE-L                315
Average MPJPE (mm)     315
APS                    308
BLEU                   301
APL                    298
Recall                 297
APM                    296
ROUGE-1                288
EM                     280
BLEU score             273
RMSE                   259
Hits-at-10             256
P

In [None]:
theoretical_maxima = {
    'accuracy' : 1,
    'AUC' : 1,
    'Precision' : 1,
    'Recall' : 1,
    
}

In [115]:
# Anotate with max metrics if appliccable

df.loc[df['performance_measure_label'].str.contains("ccuracy"),'peformance_measure_label'] = 'accuracy'



df.filter(~(df['performance_measure_label'].str.contains("ccuracy") & (df['result'].astype(float) > 100)))

0
1
2
3
4
...
57515
57516
57517
57518
57519


Accuracy              5058
Score                 2282
F1                    1488
PSNR                  1350
Top 1 Accuracy        1079
mIoU                   993
SSIM                   886
mAP                    880
Number of params       748
FID                    695
Top-1 Accuracy         671
AP                     637
Percentage correct     525
AUC                    520
Rank-1                 459
Top 5 Accuracy         457
Mean IoU               457
MAE                    455
MAP                    442
AP50                   425
box AP                 421
AP75                   391
MRR                    360
Precision              320
GFLOPs                 319
ROUGE-L                315
Average MPJPE (mm)     315
APS                    308
BLEU                   301
APL                    298
Recall                 297
APM                    296
ROUGE-1                288
EM                     280
BLEU score             273
RMSE                   259
Hits-at-10             256
P

In [57]:
df['result']=df['result'].astype(float)
# Convert % accuracy to 0-1 accuracy
inds = (df['performance_measure_label']=='Accuracy') & (df['result'].astype(float) > 1)

df.loc[inds,'result']/=100


In [None]:
indstop1 = (df['performance_measure_label'].isin('Top 1 Accuracy','Top-1 Accuracy')