# Not Recommended for Annotation (NRA) terms

In [16]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

In [17]:
PREFIXES = '''
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX edam: <http://edamontology.org/>
PREFIX sc: <http://schema.org/>
'''

In [18]:
ep_biotools = SPARQLWrapper("http://localhost:7200/repositories/Project25")
ep_biotools.setReturnFormat(JSON)

In [22]:
def remote_query(query, endpoint=ep_biotools):
    query = PREFIXES + query
    endpoint.setQuery(query)
    try:
        result = endpoint.queryAndConvert()
        pd.set_option('display.max_rows', None, 'display.max_colwidth', 6000, 'display.width', 6000)
        df = pd.DataFrame(result['results']['bindings'])
        df = df.applymap(lambda x: x['value'])
        return df
    except Exception as e:
        print(e)

## What terms in EDAM are not recommended for annotation?

In [43]:
q = '''
SELECT ?feature ?label WHERE {
   ?feature edam:notRecommendedForAnnotation true. 
   ?feature rdfs:label ?label.
}
'''
remote_query(q)

Unnamed: 0,feature,label
0,http://edamontology.org/data_0006,Data
1,http://edamontology.org/format_1915,Format
2,http://edamontology.org/operation_0004,Operation
3,http://edamontology.org/data_0842,Identifier
4,http://edamontology.org/topic_0003,Topic
5,http://edamontology.org/data_1916,Alignment
6,http://edamontology.org/data_0976,Identifier (by type of entity)
7,http://edamontology.org/data_0977,Tool identifier
8,http://edamontology.org/data_0982,Molecule identifier
9,http://edamontology.org/data_0983,Atom ID


## EDAM: Topic

### What are the most common NRA topics?

In [42]:
q = '''
SELECT ?label (COUNT(?topic) as ?count) WHERE {
?x a sc:SoftwareApplication;
   sc:applicationSubCategory ?topic.
   ?topic rdfs:label ?label.
   ?topic edam:notRecommendedForAnnotation true. 
} GROUP BY ?label
ORDER BY DESC(?count)
'''
remote_query(q)

Unnamed: 0,label,count
0,Topic,1


### Which tools have NRA topics?

In [37]:
q = '''
SELECT ?biotools_id ?label WHERE {
?biotools_id a sc:SoftwareApplication;
   sc:applicationSubCategory ?topic.
   ?topic rdfs:label ?label.
   ?topic edam:notRecommendedForAnnotation true. 
}
'''
remote_query(q)

Unnamed: 0,biotools_id,label
0,https://bio.tools/minirmd,Topic


## EDAM: Operation

### What are the most common NRA operations?

In [40]:
q = '''
SELECT ?label (COUNT(?operation) as ?count) WHERE {
?x a sc:SoftwareApplication;
   sc:featureList ?operation.
   ?operation rdfs:label ?label.
   ?operation edam:notRecommendedForAnnotation true. 
} GROUP BY ?label
ORDER BY DESC(?count)
'''
remote_query(q)

Unnamed: 0,label,count
0,Visualisation,2022
1,Quantification,879
2,Clustering,794
3,Modelling and simulation,511
4,Data handling,468
5,Validation,424
6,Mapping,422
7,Prediction and recognition,397
8,Analysis,381
9,Annotation,142


### Which tools have NRA operations?

In [41]:
q = '''
SELECT ?biotools_id ?label WHERE {
?biotools_id a sc:SoftwareApplication;
   sc:featureList ?operation.
   ?operation rdfs:label ?label.
   ?operation edam:notRecommendedForAnnotation true. 
}
'''
remote_query(q)

Unnamed: 0,biotools_id,label
0,https://bio.tools/LeishInDB,Operation
1,https://bio.tools/MDPBiome,Operation
2,https://bio.tools/MeroX,Operation
3,https://bio.tools/augustus,Operation
4,https://bio.tools/checkm,Operation
5,https://bio.tools/long-read-tools,Operation
6,https://bio.tools/pharmcat,Operation
7,https://bio.tools/phaseme,Operation
8,https://bio.tools/webaugustus,Operation
9,https://bio.tools/VISOR,Alignment
