# Join French Crop Usage (FCU) - EPPO - TAXREF-LD - V2

EPPO Global Database: https://gd.eppo.int/

Changeswrt. V1:
- only consider FCU leaves or their direct parents
- only consider FCU crops under "usage des plantes cultivées", ignore "multi-usage" which is too ambiguous
- keep all alignements to TAXREF, do not try to keep only one


### Initializations

In [2]:
import sys
import json
import os
from string import Template
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON, POST
from time import sleep
from math import isnan, nan
import numpy as np
import requests
from http import HTTPStatus

In [3]:
sys.path.append('../..')
from utils import *

In [39]:
fcu_endpoint = "http://ontology.inrae.fr/frenchcropusage/sparql"
#fcu_endpoint = "http://localhost:8080/sparql"
taxref_endpoint = "https://taxref.mnhn.fr/sparql"

In [40]:
# EPPO API endpoint and private key
eppo_url = 'https://data.eppo.int/api/rest/1.0/'
eppo_key = 'f09732fd7fad725157edd884c825e46a'

___
# Get all varieties from FCU

In [41]:
query =  prefixes + '''
select distinct ?fcu_concept ?fcu_name ?fcu_name_type where {

    # Select only cultivated crops
    <http://ontology.inrae.fr/frenchcropusage/Usages_plantes_cultivees> skos:narrower+ ?fcu_concept.

    # Keep only leaves or their direct parent but not above
    FILTER (NOT EXISTS { ?fcu_concept skos:narrower/skos:narrower ?child. })

    # Get prefered and alternate labels and keep track of the type of label
    ?fcu_concept a skos:Concept.
    { ?fcu_concept skos:prefLabel ?lb. bind("pref" as ?fcu_name_type) }
    UNION
    { ?fcu_concept skos:altLabel  ?lb. bind("alt" as ?fcu_name_type) }

    bind(str(lcase(?lb)) as ?fcu_name)

} order by ?fcu_concept
'''

In [42]:
%time df_fcu = exec_sparql(fcu_endpoint, query)

Wall time: 410 ms


In [8]:
# Add empty columns for the next steps
df_fcu["eppo_code"] = None
df_fcu["scientific_name"] = None

In [9]:
dataframe_preview(df_fcu, end=5)

== Number of lines: 1515
== Number of unique values:
fcu_concept         446
fcu_name           1481
fcu_name_type         2
eppo_code             0
scientific_name       0
dtype: int64


Unnamed: 0,fcu_concept,fcu_name,fcu_name_type,eppo_code,scientific_name
0,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier,pref,,
1,http://ontology.inrae.fr/frenchcropusage/Abric...,abricot,alt,,
2,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier pays,pref,,
3,http://ontology.inrae.fr/frenchcropusage/Abric...,abricot pays,alt,,
4,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier des antilles,alt,,


____
# Get EPPO codes and scientific names corresponding to FCU names

EPPO API documentation: https://data.eppo.int/documentation/rest

## Get EPPO codes of FCU names

EPPO API names2codes takes a pipe-separated list of names and returns a pipe-separated list of pairs 'name;code'.

In [12]:
# Build the pipe-separated list of FCU names
concat_fcu_concepts = ''
for _index, row in df_fcu.iterrows():
    if concat_fcu_concepts != '':
        concat_fcu_concepts += '|'
    concat_fcu_concepts += row['fcu_name'].strip()

# Query EPPO for codes corresponding to the list of names
eppo_query = eppo_url + 'tools/names2codes'
query = {'intext': concat_fcu_concepts, 'authtoken': eppo_key}
api_response = requests.post(eppo_query, data = query)

response = {}
if (api_response.status_code != HTTPStatus.OK):
    print('Cannot query PDDO API. Reaons: ' + api_response.reason + '. Content: ' + api_response.text)
    exit()

# The API response is formatted as 'name;code|name;code|...'
_response = json.loads(api_response.content)["response"]
_df_index = 0
for _entry in _response.split('|'):
    _y = _entry.split(';')
    #print('EEPO answer: ' + _entry)
    if (_y[1] != "****NOT FOUND*****"):
        if df_fcu.at[_df_index, 'fcu_name'].strip() != _y[0]:
            print('Lines mismatch error. FCU name: ' + df_fcu.at[_df_index, 'fcu_name'].strip() + ', EPPO result name: ' + _y[0])
        else:
            df_fcu.at[_df_index, 'eppo_code'] = _y[1]
    _df_index = _df_index + 1

In [13]:
dataframe_preview(df_fcu, end=10)

== Number of lines: 1515
== Number of unique values:
fcu_concept         446
fcu_name           1481
fcu_name_type         2
eppo_code           334
scientific_name       0
dtype: int64


Unnamed: 0,fcu_concept,fcu_name,fcu_name_type,eppo_code,scientific_name
0,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier,pref,PRNAR,
1,http://ontology.inrae.fr/frenchcropusage/Abric...,abricot,alt,,
2,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier pays,pref,,
3,http://ontology.inrae.fr/frenchcropusage/Abric...,abricot pays,alt,,
4,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier des antilles,alt,MAFAM,
5,http://ontology.inrae.fr/frenchcropusage/Abric...,mamey,alt,POJSA,
6,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier-pays,alt,,
7,http://ontology.inrae.fr/frenchcropusage/Actin...,actinidia,pref,ATICH,
8,http://ontology.inrae.fr/frenchcropusage/Actin...,groseille de chine,alt,,
9,http://ontology.inrae.fr/frenchcropusage/Actin...,kiwi,alt,ATICH,


## Get the scientific name of each EPPO code

Query the EPPO API again to get the scientific name corresponding to each of the EPPO codes retrieved.

In [14]:
# Set a max number of queries to submit. 0 = unlimited.
MAX_QUERIES = 0

eppo_query_tpl =  Template(eppo_url + 'taxon/$EPPOCODE/taxonomy?authtoken=' + eppo_key)

for _index, row in df_fcu.iterrows():
    if row['eppo_code'] is not None:
        print(f"---- Running query {_index + 1}/{len(df_fcu)} - fcu_name = {row['fcu_name']}, EPPO code: {row['eppo_code']}")
        eppo_query = eppo_query_tpl.substitute(EPPOCODE = row['eppo_code'].strip())
        #print(eppo_query)

        api_response = requests.get(eppo_query)
        if (api_response.status_code != HTTPStatus.OK):
            print('Cannot query EPPO API. Reaons: ' + api_response.reason + '. Content: ' + api_response.text)
        else:
            # Parse the result and extract only the name with taxonomic rank 'species' = highest value of 'level', 8 or 9 in general
            _response = json.loads(api_response.content)
            # The response is a JSON array where each element is a document like:
            # { "codeid": 41521,
            #   "eppocode": "PRNAR",
            #   "prefname": "Prunus armeniaca",
            #   "level": 9 }
            #species = list(filter(lambda r: r['level'] == 9, _response))
            species = {'level': 0}
            for r in _response:
                if r['level'] >= species['level']:
                    species = r

            # Get the scientific name of the name with highest rank
            if 'prefname' in species:
                df_fcu.at[_index, 'scientific_name'] = species['prefname']

    if MAX_QUERIES > 0 and (_index + 1) >= MAX_QUERIES:
        break

---- Running query 1/1515 - fcu_name = abricotier, EPPO code: PRNAR
---- Running query 5/1515 - fcu_name = abricotier des antilles, EPPO code: MAFAM
---- Running query 6/1515 - fcu_name = mamey, EPPO code: POJSA
---- Running query 8/1515 - fcu_name = actinidia, EPPO code: ATICH
---- Running query 10/1515 - fcu_name = kiwi, EPPO code: ATICH
---- Running query 14/1515 - fcu_name = ail, EPPO code: ALLSA
---- Running query 15/1515 - fcu_name = ail commun, EPPO code: ALLSA
---- Running query 16/1515 - fcu_name = ail cultivé, EPPO code: ALLSA
---- Running query 17/1515 - fcu_name = aillet, EPPO code: ALLVI
---- Running query 18/1515 - fcu_name = alpiste, EPPO code: PHABR
---- Running query 19/1515 - fcu_name = alpiste des canaries, EPPO code: PHACA
---- Running query 22/1515 - fcu_name = amandier, EPPO code: PRNDU
---- Running query 25/1515 - fcu_name = amande douce, EPPO code: QPHOV
---- Running query 27/1515 - fcu_name = ananas, EPPO code: ANHCO
---- Running query 29/1515 - fcu_name = anet

In [15]:
dataframe_preview(df_fcu, end=10)

== Number of lines: 1515
== Number of unique values:
fcu_concept         446
fcu_name           1481
fcu_name_type         2
eppo_code           334
scientific_name     334
dtype: int64


Unnamed: 0,fcu_concept,fcu_name,fcu_name_type,eppo_code,scientific_name
0,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier,pref,PRNAR,Prunus armeniaca
1,http://ontology.inrae.fr/frenchcropusage/Abric...,abricot,alt,,
2,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier pays,pref,,
3,http://ontology.inrae.fr/frenchcropusage/Abric...,abricot pays,alt,,
4,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier des antilles,alt,MAFAM,Mammea americana
5,http://ontology.inrae.fr/frenchcropusage/Abric...,mamey,alt,POJSA,Pouteria sapota
6,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier-pays,alt,,
7,http://ontology.inrae.fr/frenchcropusage/Actin...,actinidia,pref,ATICH,Actinidia chinensis
8,http://ontology.inrae.fr/frenchcropusage/Actin...,groseille de chine,alt,,
9,http://ontology.inrae.fr/frenchcropusage/Actin...,kiwi,alt,ATICH,Actinidia chinensis


In [16]:
df_fcu.to_excel("result1_fcu_eppo.xlsx")

#### Count only matches

In [17]:
dataframe_preview(df_fcu.dropna(), end=10)

== Number of lines: 526
== Number of unique values:
fcu_concept        283
fcu_name           509
fcu_name_type        2
eppo_code          334
scientific_name    334
dtype: int64


Unnamed: 0,fcu_concept,fcu_name,fcu_name_type,eppo_code,scientific_name
0,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier,pref,PRNAR,Prunus armeniaca
4,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier des antilles,alt,MAFAM,Mammea americana
5,http://ontology.inrae.fr/frenchcropusage/Abric...,mamey,alt,POJSA,Pouteria sapota
7,http://ontology.inrae.fr/frenchcropusage/Actin...,actinidia,pref,ATICH,Actinidia chinensis
9,http://ontology.inrae.fr/frenchcropusage/Actin...,kiwi,alt,ATICH,Actinidia chinensis
13,http://ontology.inrae.fr/frenchcropusage/Ails,ail,pref,ALLSA,Allium sativum
14,http://ontology.inrae.fr/frenchcropusage/Ails,ail commun,alt,ALLSA,Allium sativum
15,http://ontology.inrae.fr/frenchcropusage/Ails,ail cultivé,alt,ALLSA,Allium sativum
16,http://ontology.inrae.fr/frenchcropusage/Ails,aillet,alt,ALLVI,Allium vineale
17,http://ontology.inrae.fr/frenchcropusage/Alpistes,alpiste,pref,PHABR,Phalaris brachystachys


____
# Query TAXREF-LD with EPPO scientific names


EPPO provides short scientific names of species: genus + epythet, which may correspond to several full scientific names (with authority and date) in TAXREF-LD, e.g.: *Prunus armeniaca L., 1753* and *Prunus armeniaca Chevall., 1827*.

Besides, each name in TAXREF-LD could be either a reference name (the name used to denote the taxon) or a synonym of the reference name. The query below keeps track of all the matching names and wether these are reference or synonym names.

The SPARQL endpoint of TAXREF-LD does not support passing all scientific names at once in a VALUES clause (ends up with a timeout).
To avoid this, we make a SPARQL query for each FCU variety.


In [18]:
queryTpl = Template(prefixes + '''
select distinct ("$fcu_name" as ?fcu_name) ("$eppo_code" as ?eppo_code) ("$scientific_name" as ?scientific_name) ?taxref_name_type ?taxref_full_name ?taxref_ref_full_name ?taxon ?rank 
from <http://taxref.mnhn.fr/lod/graph/classes/15.0>
from <http://taxref.mnhn.fr/lod/graph/vernacular/15.0>
from <http://taxref.mnhn.fr/lod/graph/concepts>
where {
    ?name
       a                      skos:Concept;
       skos:prefLabel         "$scientific_name";
       rdfs:label             ?taxref_full_name. # scientific name + authority
    
    { ?name taxrefp:isReferenceNameOf ?taxon. bind("pref" as ?taxref_name_type) }
    union
    { ?name taxrefp:isSynonymOf       ?taxon. bind("alt" as ?taxref_name_type) }
    
    ?taxon
       taxrefp:hasRank        ?rank;
       taxrefp:hasReferenceName [ rdfs:label ?taxref_ref_full_name ].

    # All ranks up to spcecies but not above
    filter (?rank in (
        taxrefrk:Species,  taxrefrk:SemiSpecies, taxrefrk:MicroSpecies, taxrefrk:SubSpecies, taxrefrk:Natio, 
        taxrefrk:Varietas, taxrefrk:SubVarietas, taxrefrk:Forma,        taxrefrk:SubForma,   taxrefrk:FormaSpecies,
        taxrefrk:Linea,    taxrefrk:Clone,       taxrefrk:Race,         taxrefrk:Cultivar,   taxrefrk:Morpha,
        taxrefrk:Abberatio ))
}''')

In [19]:
# Set a max number of queries to submit. 0 = unlimited.
MAX_QUERIES = 0

# Result DataFrame
df_fcu_taxref = pd.DataFrame()

for _index, row in df_fcu.iterrows():
    if row['scientific_name'] is not None:
        query = queryTpl.substitute(
            scientific_name = row['scientific_name'].strip(), 
            fcu_name = row['fcu_name'].strip().lower(),
            eppo_code = row['eppo_code']
        )
        #print(query)

        print(f"---- Running query {_index + 1}/{len(df_fcu)} - fcu_name = {row['fcu_name']}, scientific name: {row['scientific_name']}")
        %time _df = exec_sparql(taxref_endpoint, query)
        print(f'Number of results: {_df.shape[0]}')
        df_fcu_taxref = df_fcu_taxref.append(_df)

        # Keep track of the names not matched with TAXREF
        if _df.shape[0] == 0:
            nomatch_row = {'fcu_name': row['fcu_name'].strip(), 'eppo_code': row['eppo_code'], 'scientific_name': row['scientific_name'] }
            df_fcu_taxref = df_fcu_taxref.append(nomatch_row, ignore_index=True)

    if MAX_QUERIES > 0 and (_index + 1) >= MAX_QUERIES:
        break

---- Running query 1/1515 - fcu_name = abricotier, scientific name: Prunus armeniaca
Wall time: 286 ms
Number of results: 2
---- Running query 5/1515 - fcu_name = abricotier des antilles, scientific name: Mammea americana
Wall time: 245 ms
Number of results: 1
---- Running query 6/1515 - fcu_name = mamey, scientific name: Pouteria sapota
Wall time: 285 ms
Number of results: 1
---- Running query 8/1515 - fcu_name = actinidia, scientific name: Actinidia chinensis
Wall time: 204 ms
Number of results: 2
---- Running query 10/1515 - fcu_name = kiwi, scientific name: Actinidia chinensis
Wall time: 270 ms
Number of results: 2
---- Running query 14/1515 - fcu_name = ail, scientific name: Allium sativum
Wall time: 244 ms
Number of results: 1
---- Running query 15/1515 - fcu_name = ail commun, scientific name: Allium sativum
Wall time: 246 ms
Number of results: 1
---- Running query 16/1515 - fcu_name = ail cultivé, scientific name: Allium sativum
Wall time: 234 ms
Number of results: 1
---- Runni

### Result:

- taxref_full_name: the full scientific name with authority and date, that matches the EPPO (short) scientific name
- taxref_name_type: wether this full scientific name is a reference name (pref) or synonym (alt) of a taxon in TAXREF-LD
- taxref_ref_full_name: if the full scientific name, this column gives the reference name that is the taxon name.
    

In [20]:
dataframe_preview(df_fcu_taxref)

== Number of lines: 595
== Number of unique values:
fcu_name                509
eppo_code               334
scientific_name         334
taxref_name_type          2
taxref_full_name        344
taxref_ref_full_name    315
taxon                   315
rank                      4
dtype: int64


Unnamed: 0,fcu_name,eppo_code,scientific_name,taxref_name_type,taxref_full_name,taxref_ref_full_name,taxon,rank
0,abricotier,PRNAR,Prunus armeniaca,pref,"Prunus armeniaca L., 1753","Prunus armeniaca L., 1753",http://taxref.mnhn.fr/lod/taxon/116041,http://taxref.mnhn.fr/lod/taxrank/Species
1,abricotier,PRNAR,Prunus armeniaca,alt,"Prunus armeniaca Chevall., 1827",Prunus domestica var. insititia (L.) Fiori & P...,http://taxref.mnhn.fr/lod/taxon/965160,http://taxref.mnhn.fr/lod/taxrank/Varietas
2,abricotier des antilles,MAFAM,Mammea americana,pref,"Mammea americana L., 1753","Mammea americana L., 1753",http://taxref.mnhn.fr/lod/taxon/447085,http://taxref.mnhn.fr/lod/taxrank/Species
3,mamey,POJSA,Pouteria sapota,pref,"Pouteria sapota (Jacq.) H.E.Moore & Stearn, 1967","Pouteria sapota (Jacq.) H.E.Moore & Stearn, 1967",http://taxref.mnhn.fr/lod/taxon/630482,http://taxref.mnhn.fr/lod/taxrank/Species
4,actinidia,ATICH,Actinidia chinensis,pref,"Actinidia chinensis Planch., 1847","Actinidia chinensis Planch., 1847",http://taxref.mnhn.fr/lod/taxon/80138,http://taxref.mnhn.fr/lod/taxrank/Species
5,actinidia,ATICH,Actinidia chinensis,alt,"Actinidia chinensis auct. non Planch., 1847",Actinidia deliciosa (A.Chev.) C.F.Liang & A.R....,http://taxref.mnhn.fr/lod/taxon/80140,http://taxref.mnhn.fr/lod/taxrank/Species
6,kiwi,ATICH,Actinidia chinensis,pref,"Actinidia chinensis Planch., 1847","Actinidia chinensis Planch., 1847",http://taxref.mnhn.fr/lod/taxon/80138,http://taxref.mnhn.fr/lod/taxrank/Species
7,kiwi,ATICH,Actinidia chinensis,alt,"Actinidia chinensis auct. non Planch., 1847",Actinidia deliciosa (A.Chev.) C.F.Liang & A.R....,http://taxref.mnhn.fr/lod/taxon/80140,http://taxref.mnhn.fr/lod/taxrank/Species
8,ail,ALLSA,Allium sativum,pref,"Allium sativum L., 1753","Allium sativum L., 1753",http://taxref.mnhn.fr/lod/taxon/81505,http://taxref.mnhn.fr/lod/taxrank/Species
9,ail commun,ALLSA,Allium sativum,pref,"Allium sativum L., 1753","Allium sativum L., 1753",http://taxref.mnhn.fr/lod/taxon/81505,http://taxref.mnhn.fr/lod/taxrank/Species


In [21]:
df_fcu_taxref.to_excel("result2_eppo_taxref.xlsx")

### Count only matches

In [23]:
dataframe_preview(df_fcu_taxref.dropna())

== Number of lines: 558
== Number of unique values:
fcu_name                474
eppo_code               306
scientific_name         306
taxref_name_type          2
taxref_full_name        344
taxref_ref_full_name    315
taxon                   315
rank                      4
dtype: int64


Unnamed: 0,fcu_name,eppo_code,scientific_name,taxref_name_type,taxref_full_name,taxref_ref_full_name,taxon,rank
0,abricotier,PRNAR,Prunus armeniaca,pref,"Prunus armeniaca L., 1753","Prunus armeniaca L., 1753",http://taxref.mnhn.fr/lod/taxon/116041,http://taxref.mnhn.fr/lod/taxrank/Species
1,abricotier,PRNAR,Prunus armeniaca,alt,"Prunus armeniaca Chevall., 1827",Prunus domestica var. insititia (L.) Fiori & P...,http://taxref.mnhn.fr/lod/taxon/965160,http://taxref.mnhn.fr/lod/taxrank/Varietas
2,abricotier des antilles,MAFAM,Mammea americana,pref,"Mammea americana L., 1753","Mammea americana L., 1753",http://taxref.mnhn.fr/lod/taxon/447085,http://taxref.mnhn.fr/lod/taxrank/Species
3,mamey,POJSA,Pouteria sapota,pref,"Pouteria sapota (Jacq.) H.E.Moore & Stearn, 1967","Pouteria sapota (Jacq.) H.E.Moore & Stearn, 1967",http://taxref.mnhn.fr/lod/taxon/630482,http://taxref.mnhn.fr/lod/taxrank/Species
4,actinidia,ATICH,Actinidia chinensis,pref,"Actinidia chinensis Planch., 1847","Actinidia chinensis Planch., 1847",http://taxref.mnhn.fr/lod/taxon/80138,http://taxref.mnhn.fr/lod/taxrank/Species
5,actinidia,ATICH,Actinidia chinensis,alt,"Actinidia chinensis auct. non Planch., 1847",Actinidia deliciosa (A.Chev.) C.F.Liang & A.R....,http://taxref.mnhn.fr/lod/taxon/80140,http://taxref.mnhn.fr/lod/taxrank/Species
6,kiwi,ATICH,Actinidia chinensis,pref,"Actinidia chinensis Planch., 1847","Actinidia chinensis Planch., 1847",http://taxref.mnhn.fr/lod/taxon/80138,http://taxref.mnhn.fr/lod/taxrank/Species
7,kiwi,ATICH,Actinidia chinensis,alt,"Actinidia chinensis auct. non Planch., 1847",Actinidia deliciosa (A.Chev.) C.F.Liang & A.R....,http://taxref.mnhn.fr/lod/taxon/80140,http://taxref.mnhn.fr/lod/taxrank/Species
8,ail,ALLSA,Allium sativum,pref,"Allium sativum L., 1753","Allium sativum L., 1753",http://taxref.mnhn.fr/lod/taxon/81505,http://taxref.mnhn.fr/lod/taxrank/Species
9,ail commun,ALLSA,Allium sativum,pref,"Allium sativum L., 1753","Allium sativum L., 1753",http://taxref.mnhn.fr/lod/taxon/81505,http://taxref.mnhn.fr/lod/taxrank/Species


---
# Merge of FCU names list and TAXREF-LD names

In [33]:
df_merge = pd.merge(df_fcu, df_fcu_taxref, on="scientific_name", how='left')
df_merge.drop(columns=['fcu_name_y', 'eppo_code_y'], inplace=True)
df_merge.rename(columns={"fcu_name_x": "fcu_name", "eppo_code_x": "eppo_code"}, inplace=True)
df_merge.drop_duplicates(inplace=True)

In [25]:
dataframe_preview(df_merge)

== Number of lines: 1584
== Number of unique values:
fcu_concept              446
fcu_name                1481
fcu_name_type              2
eppo_code                334
scientific_name          334
taxref_name_type           2
taxref_full_name         344
taxref_ref_full_name     315
taxon                    315
rank                       4
dtype: int64


Unnamed: 0,fcu_concept,fcu_name,fcu_name_type,eppo_code,scientific_name,taxref_name_type,taxref_full_name,taxref_ref_full_name,taxon,rank
0,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier,pref,PRNAR,Prunus armeniaca,pref,"Prunus armeniaca L., 1753","Prunus armeniaca L., 1753",http://taxref.mnhn.fr/lod/taxon/116041,http://taxref.mnhn.fr/lod/taxrank/Species
1,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier,pref,PRNAR,Prunus armeniaca,alt,"Prunus armeniaca Chevall., 1827",Prunus domestica var. insititia (L.) Fiori & P...,http://taxref.mnhn.fr/lod/taxon/965160,http://taxref.mnhn.fr/lod/taxrank/Varietas
2,http://ontology.inrae.fr/frenchcropusage/Abric...,abricot,alt,,,,,,,
3,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier pays,pref,,,,,,,
4,http://ontology.inrae.fr/frenchcropusage/Abric...,abricot pays,alt,,,,,,,
5,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier des antilles,alt,MAFAM,Mammea americana,pref,"Mammea americana L., 1753","Mammea americana L., 1753",http://taxref.mnhn.fr/lod/taxon/447085,http://taxref.mnhn.fr/lod/taxrank/Species
6,http://ontology.inrae.fr/frenchcropusage/Abric...,mamey,alt,POJSA,Pouteria sapota,pref,"Pouteria sapota (Jacq.) H.E.Moore & Stearn, 1967","Pouteria sapota (Jacq.) H.E.Moore & Stearn, 1967",http://taxref.mnhn.fr/lod/taxon/630482,http://taxref.mnhn.fr/lod/taxrank/Species
7,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier-pays,alt,,,,,,,
8,http://ontology.inrae.fr/frenchcropusage/Actin...,actinidia,pref,ATICH,Actinidia chinensis,pref,"Actinidia chinensis Planch., 1847","Actinidia chinensis Planch., 1847",http://taxref.mnhn.fr/lod/taxon/80138,http://taxref.mnhn.fr/lod/taxrank/Species
9,http://ontology.inrae.fr/frenchcropusage/Actin...,actinidia,pref,ATICH,Actinidia chinensis,alt,"Actinidia chinensis auct. non Planch., 1847",Actinidia deliciosa (A.Chev.) C.F.Liang & A.R....,http://taxref.mnhn.fr/lod/taxon/80140,http://taxref.mnhn.fr/lod/taxrank/Species


#### Count only matches

In [26]:
dataframe_preview(df_merge.dropna())

== Number of lines: 558
== Number of unique values:
fcu_concept             266
fcu_name                474
fcu_name_type             2
eppo_code               306
scientific_name         306
taxref_name_type          2
taxref_full_name        344
taxref_ref_full_name    315
taxon                   315
rank                      4
dtype: int64


Unnamed: 0,fcu_concept,fcu_name,fcu_name_type,eppo_code,scientific_name,taxref_name_type,taxref_full_name,taxref_ref_full_name,taxon,rank
0,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier,pref,PRNAR,Prunus armeniaca,pref,"Prunus armeniaca L., 1753","Prunus armeniaca L., 1753",http://taxref.mnhn.fr/lod/taxon/116041,http://taxref.mnhn.fr/lod/taxrank/Species
1,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier,pref,PRNAR,Prunus armeniaca,alt,"Prunus armeniaca Chevall., 1827",Prunus domestica var. insititia (L.) Fiori & P...,http://taxref.mnhn.fr/lod/taxon/965160,http://taxref.mnhn.fr/lod/taxrank/Varietas
5,http://ontology.inrae.fr/frenchcropusage/Abric...,abricotier des antilles,alt,MAFAM,Mammea americana,pref,"Mammea americana L., 1753","Mammea americana L., 1753",http://taxref.mnhn.fr/lod/taxon/447085,http://taxref.mnhn.fr/lod/taxrank/Species
6,http://ontology.inrae.fr/frenchcropusage/Abric...,mamey,alt,POJSA,Pouteria sapota,pref,"Pouteria sapota (Jacq.) H.E.Moore & Stearn, 1967","Pouteria sapota (Jacq.) H.E.Moore & Stearn, 1967",http://taxref.mnhn.fr/lod/taxon/630482,http://taxref.mnhn.fr/lod/taxrank/Species
8,http://ontology.inrae.fr/frenchcropusage/Actin...,actinidia,pref,ATICH,Actinidia chinensis,pref,"Actinidia chinensis Planch., 1847","Actinidia chinensis Planch., 1847",http://taxref.mnhn.fr/lod/taxon/80138,http://taxref.mnhn.fr/lod/taxrank/Species
9,http://ontology.inrae.fr/frenchcropusage/Actin...,actinidia,pref,ATICH,Actinidia chinensis,alt,"Actinidia chinensis auct. non Planch., 1847",Actinidia deliciosa (A.Chev.) C.F.Liang & A.R....,http://taxref.mnhn.fr/lod/taxon/80140,http://taxref.mnhn.fr/lod/taxrank/Species
13,http://ontology.inrae.fr/frenchcropusage/Actin...,kiwi,alt,ATICH,Actinidia chinensis,pref,"Actinidia chinensis Planch., 1847","Actinidia chinensis Planch., 1847",http://taxref.mnhn.fr/lod/taxon/80138,http://taxref.mnhn.fr/lod/taxrank/Species
14,http://ontology.inrae.fr/frenchcropusage/Actin...,kiwi,alt,ATICH,Actinidia chinensis,alt,"Actinidia chinensis auct. non Planch., 1847",Actinidia deliciosa (A.Chev.) C.F.Liang & A.R....,http://taxref.mnhn.fr/lod/taxon/80140,http://taxref.mnhn.fr/lod/taxrank/Species
20,http://ontology.inrae.fr/frenchcropusage/Ails,ail,pref,ALLSA,Allium sativum,pref,"Allium sativum L., 1753","Allium sativum L., 1753",http://taxref.mnhn.fr/lod/taxon/81505,http://taxref.mnhn.fr/lod/taxrank/Species
23,http://ontology.inrae.fr/frenchcropusage/Ails,ail commun,alt,ALLSA,Allium sativum,pref,"Allium sativum L., 1753","Allium sativum L., 1753",http://taxref.mnhn.fr/lod/taxon/81505,http://taxref.mnhn.fr/lod/taxrank/Species


### Matches:
- FCU: 266 unique concepts, 474 unique labels
- EPPO: 306 unique codes and scientific names
- TAXREF-LD: 315 unique taxa from 5 ranks

### Exports

In [27]:
df_merge.to_excel("result3_fcu_eppo_taxref.xlsx")

In [34]:
# Reshape the DataFrame for later merging with other methods
df_merge.dropna(inplace=True)
df_merge.drop(columns=['fcu_name_type', 'fcu_name', 'eppo_code', 'taxref_name_type', 'taxref_full_name'], inplace=True)
df_merge.insert(1, 'method', 'eppo')
df_merge.insert(2, 'geves_name', '')
df_merge.rename(columns={'scientific_name': 'eppo_scientific_name'}, inplace=True)

In [36]:
dataframe_preview(df_merge)
df_merge.to_excel("result3_fcu_eppo_taxref_merge.xlsx", index=False)
df_merge.to_csv("result3_fcu_eppo_taxref_merge.csv", index=False)

== Number of lines: 558
== Number of unique values:
fcu_concept             266
method                    1
geves_name                1
eppo_scientific_name    306
taxref_ref_full_name    315
taxon                   315
rank                      4
dtype: int64


Unnamed: 0,fcu_concept,method,geves_name,eppo_scientific_name,taxref_ref_full_name,taxon,rank
0,http://ontology.inrae.fr/frenchcropusage/Abric...,eppo,,Prunus armeniaca,"Prunus armeniaca L., 1753",http://taxref.mnhn.fr/lod/taxon/116041,http://taxref.mnhn.fr/lod/taxrank/Species
1,http://ontology.inrae.fr/frenchcropusage/Abric...,eppo,,Prunus armeniaca,Prunus domestica var. insititia (L.) Fiori & P...,http://taxref.mnhn.fr/lod/taxon/965160,http://taxref.mnhn.fr/lod/taxrank/Varietas
5,http://ontology.inrae.fr/frenchcropusage/Abric...,eppo,,Mammea americana,"Mammea americana L., 1753",http://taxref.mnhn.fr/lod/taxon/447085,http://taxref.mnhn.fr/lod/taxrank/Species
6,http://ontology.inrae.fr/frenchcropusage/Abric...,eppo,,Pouteria sapota,"Pouteria sapota (Jacq.) H.E.Moore & Stearn, 1967",http://taxref.mnhn.fr/lod/taxon/630482,http://taxref.mnhn.fr/lod/taxrank/Species
8,http://ontology.inrae.fr/frenchcropusage/Actin...,eppo,,Actinidia chinensis,"Actinidia chinensis Planch., 1847",http://taxref.mnhn.fr/lod/taxon/80138,http://taxref.mnhn.fr/lod/taxrank/Species
9,http://ontology.inrae.fr/frenchcropusage/Actin...,eppo,,Actinidia chinensis,Actinidia deliciosa (A.Chev.) C.F.Liang & A.R....,http://taxref.mnhn.fr/lod/taxon/80140,http://taxref.mnhn.fr/lod/taxrank/Species
13,http://ontology.inrae.fr/frenchcropusage/Actin...,eppo,,Actinidia chinensis,"Actinidia chinensis Planch., 1847",http://taxref.mnhn.fr/lod/taxon/80138,http://taxref.mnhn.fr/lod/taxrank/Species
14,http://ontology.inrae.fr/frenchcropusage/Actin...,eppo,,Actinidia chinensis,Actinidia deliciosa (A.Chev.) C.F.Liang & A.R....,http://taxref.mnhn.fr/lod/taxon/80140,http://taxref.mnhn.fr/lod/taxrank/Species
20,http://ontology.inrae.fr/frenchcropusage/Ails,eppo,,Allium sativum,"Allium sativum L., 1753",http://taxref.mnhn.fr/lod/taxon/81505,http://taxref.mnhn.fr/lod/taxrank/Species
23,http://ontology.inrae.fr/frenchcropusage/Ails,eppo,,Allium sativum,"Allium sativum L., 1753",http://taxref.mnhn.fr/lod/taxon/81505,http://taxref.mnhn.fr/lod/taxrank/Species
