## Load plant names

In [40]:
import pandas as pd
import inflect

csv = '../datasets/companion-planting-wikidata-latin.csv'
df = pd.read_csv(csv)
taxon_names = []
interactions = []
globi_uris = []

In [2]:
df.head()

Unnamed: 0,name,id,latin
0,Shallots,Q193498,Allium ascalonicum
1,Cabbages,Q146212,Brassica oleracea
2,Garlic,Q23400,Allium sativum
3,Basil,Q38859,Ocimum basilicum
4,Grape Vine,Q30046,Vitis vinifera


## Retrieve wikidata info (if necessary) 

In [3]:
from SPARQLWrapper import SPARQLWrapper, JSON
p = inflect.engine()

for name in df[0:2].name: 
    
    name = p.singular_noun(name, count=None)
    sparql = SPARQLWrapper(
        "https://query.wikidata.org/sparql"
    )
    sparql.setReturnFormat(JSON)

    #query for latin name, order on length of string match (shorter = better)
    sparql.setQuery("""
    SELECT distinct ?taxon ?taxonName ?commonName WHERE {
      ?taxon wdt:P225 ?taxonName.
      #?taxon wdt:P1843 ?commonName .
      ?taxon rdfs:label ?commonName .
      FILTER(CONTAINS(STR(?commonName),'"""+name.lower()+"""')) . 
      FILTER(lang(?commonName)='en')
      } order by STRLEN(STR(?commonName))
      limit 3 
      """
    )
    
    try:
        ret = sparql.queryAndConvert()

        for r in ret["results"]["bindings"]:
            print(r)
            taxon_names.append(r['taxonName']['value'])
            
    except Exception as e:
        print(e)

EndPointInternalError: The endpoint returned the HTTP status code 500. 

Response:
b"SPARQL-QUERY: queryStr=\n    SELECT distinct ?taxon ?taxonName ?commonName WHERE {\n      ?taxon wdt:P225 ?taxonName.\n      #?taxon wdt:P1843 ?commonName .\n      ?taxon rdfs:label ?commonName .\n      FILTER(CONTAINS(STR(?commonName),'shallot')) . \n      FILTER(lang(?commonName)='en')\n      }  \n      limit 3 \n      \njava.util.concurrent.TimeoutException\n\tat java.util.concurrent.FutureTask.get(FutureTask.java:205)\n\tat com.bigdata.rdf.sail.webapp.BigdataServlet.submitApiTask(BigdataServlet.java:292)\n\tat com.bigdata.rdf.sail.webapp.QueryServlet.doSparqlQuery(QueryServlet.java:678)\n\tat com.bigdata.rdf.sail.webapp.QueryServlet.doGet(QueryServlet.java:290)\n\tat com.bigdata.rdf.sail.webapp.RESTServlet.doGet(RESTServlet.java:240)\n\tat com.bigdata.rdf.sail.webapp.MultiTenancyServlet.doGet(MultiTenancyServlet.java:273)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:687)\n\tat java

## Or use latin names from dataframe

In [43]:
taxon_names = list(df['latin'].values)

## Retrieve GLOBI IRIs for species (only necessary for SPARQL endpoint)

In [None]:
import requests

for taxon_name in taxon_names: 
    taxon_name = taxon_name.replace(" ","%20")
    URL = "https://api.globalbioticinteractions.org/findExternalUrlForTaxon/"+taxon_name
    try: globi_uris.append(requests.get(URL).json()['url'])
    except: globi_uris.append(None)


## Retrieve GLOBI Interactions as nested list

In [44]:
import requests

for taxon_name in taxon_names: 
    taxon_name = taxon_name.replace(" ","%20")
    URL = "https://api.globalbioticinteractions.org/taxon/"+taxon_name+"/interactsWith"
    try: interactions.append(requests.get(URL).json()['data'])
    except: pass

In [55]:
interactions[0]

[['Allium ascalonicum',
  'eatenBy',
  ['Acrolepiopsis assectella',
   'Spodoptera exigua',
   'Spodoptera litura',
   'Blastobasis ochromorpha',
   'Utetheisa pulchella']],
 ['Allium ascalonicum',
  'hasPathogen',
  ['Pyrenochaeta terrestris', 'Ditylenchus dipsaci']],
 ['Allium ascalonicum',
  'hostOf',
  ['Puccinia sessilis',
   'Alternaria solani-nigri',
   'Cladosporium allii',
   'Colletotrichum circinans',
   'Peronospora destructor',
   'Blastobasis',
   'Blastobasis ochromorpha',
   'Utetheisa pulchella',
   'Spodoptera exigua',
   'Spodoptera litura',
   'Cadra cautella',
   'Dasyses rugosella',
   'Acrolepiopsis assectella',
   'Botrytis allii',
   'Pyrenochaeta terrestris',
   'Sclerotium cepivorum',
   'Urocystis cepulae',
   'Sclerotium rolfsii',
   'Alternaria porri',
   'Fusarium solani',
   'Puccinia porri',
   'Phytophthora parasitica',
   'Rhizoctonia',
   'Cylindrocladium parvum',
   'Botrytis',
   'Fusarium',
   'Urocystis colchici',
   'Acari',
   'Colletotrichum g

## TODO add as OWL axioms