In [1]:
import pandas as pd
import requests
import getpass
import numpy as np

In [2]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

# pandas columns
pd.set_option("display.max_columns", None)

In [3]:
countries = [
    "Belgium",
    "Germany",
    "Denmark",
    "Estonia",
    "Spain",
    "Finland",
    "France",
    "Ireland",
    "Lithuania",
    "Latvia",
    "Netherlands",
    "Poland",
    "Portugal",
    "Sweden",
    "United Kingdom",
]

Data comes from [IUCN Red List](https://www.iucnredlist.org/search?searchType=species) after filtering by Systems=Marine


In [5]:
# We get the species by country
countriesMarine = pd.read_csv("../data/marineIUCN/countries.csv")
countriesMarine = countriesMarine[countriesMarine.name.isin(countries)]
# The taxonomy so we can filter the marine fishes
# Read here for more information https://portals.iucn.org/library/node/45188
taxonomy = pd.read_csv("../data/marineIUCN/taxonomy.csv")
fishesTaxo = taxonomy[
    taxonomy.className.isin(["CHONDRICHTHYES", "ACTINOPTERYGII", "MYXINI"])
]
countriesMarine = fishesTaxo.merge(countriesMarine, on="scientificName")
# Then get the species classification (Vulnerable, Endangered, etc.)
classification = pd.read_csv("../data/marineIUCN/simple_summary.csv")
countriesMarineClass = countriesMarine.merge(
    classification, on="scientificName", how="inner", suffixes=("", "_y")
)
countriesMarineClass.drop(
    countriesMarineClass.filter(regex="_y$").columns, axis=1, inplace=True
)
# We calculate the percentage of threatened species by country

threatenedList = ["Critically Endangered", "Endangered", "Vulnerable"]
threatenedCountry = (
    countriesMarineClass[countriesMarineClass.redlistCategory.isin(threatenedList)]
    .groupby(["name", "redlistCategory"])
    .size()
    .reset_index(name="counts")
    .groupby(["name"])
    .sum(numeric_only=True)
)
totalCountry = (
    countriesMarineClass
    .groupby(["name", "redlistCategory"])
    .size()
    .reset_index(name="counts")
    .groupby(["name"])
    .sum(numeric_only=True)
)
# Inverse to make a score 0-100 with 0 being the worst
threatenedPct = 100 - threatenedCountry / totalCountry * 100
threatenedPct.rename(columns={"counts": "2023"}, inplace=True)
threatenedPct

Unnamed: 0_level_0,2023
name,Unnamed: 1_level_1
Belgium,84.96732
Denmark,86.138614
Estonia,90.243902
Finland,90.0
France,91.332611
Germany,82.894737
Ireland,91.885965
Latvia,88.636364
Lithuania,88.372093
Netherlands,89.047619


## Historical data

In [6]:
tokenIUCN = getpass.getpass("IUCN Red List Token: ")

In [7]:
# Get historical assessment classification from IUCN Red List
# THIS TAKES 26 minutes

# speciesName = countriesMarineClass.scientificName.unique().tolist()
# speciesHistory = pd.DataFrame()
# for species in speciesName:
#     response = requests.get(
#         "https://apiv3.iucnredlist.org/api/v3/species/history/name/{0}?token={1}".format(
#             species, tokenIUCN
#         )
#     )
#     tempDf = pd.DataFrame.from_dict(response.json()["result"])
#     tempDf["scientific_name"] = species
#     speciesHistory = pd.concat([speciesHistory, tempDf], axis=0)
# speciesHistory.to_csv("../data/marineFishEUhistoryIUCN.csv", index=False)

speciesHistory = pd.read_csv("../data/marineFishEUhistoryIUCN.csv")
# create a dataframe with all the years and all the species using cross join
speciesHistory = (
    speciesHistory["year"]
    .drop_duplicates()
    .to_frame()
    .merge(speciesHistory["scientific_name"].drop_duplicates(), how="cross")
    .merge(speciesHistory, how="left")
)
# ffill the classification
speciesHistory.sort_values(["scientific_name", "year"], inplace=True)
speciesHistory[["category", "assess_year"]] = (
    speciesHistory.groupby("scientific_name")[["category", "assess_year"]]
    .ffill()
    # sometimes, the assess_year is not available until years later,
    # but the category is still valid since it was assessed in the past
    .bfill()
)

speciesHistory.loc[
    speciesHistory.year < speciesHistory.assess_year, ["assess_year", "category"]
] = np.nan


# merge to get spcies by country
speciesHistory.rename(columns={"scientific_name": "scientificName"}, inplace=True)
speciesHistoryCountry = countriesMarineClass.merge(
    speciesHistory, on="scientificName", how="inner", suffixes=("", "_y")
)
speciesHistoryCountry.drop(
    speciesHistoryCountry.filter(regex="_y$").columns, axis=1, inplace=True
)

speciesHistoryCountry = speciesHistoryCountry[
    [
        "scientificName",
        "className",
        "name",
        "populationTrend",
        "year",
        "assess_year",
        "category",
    ]
]

speciesHistoryCountry = (
    speciesHistoryCountry.groupby(
        [
            "name",
            "year",
            "category",
        ]
    )
    .size()
    .reset_index(name="count")
)
speciesHistoryCountry = speciesHistoryCountry[
    speciesHistoryCountry.year.isin([2012, 2016, 2022])
]

threatenedList = ["Critically Endangered", "Endangered", "Vulnerable"]

total = (
    speciesHistoryCountry.groupby(["name", "year"]).sum(numeric_only=True).rename(columns={"count": "total"})
)

threatened = (
    speciesHistoryCountry[speciesHistoryCountry.category.isin(threatenedList)]
    .groupby(["name", "year"])
    .sum(numeric_only=True).rename(columns={"count": "threatened"})
)
threatenedFish = total.merge(threatened, left_index=True, right_index=True)
# Inverse to make a score 0-100 with 0 being the worst
threatenedFish['threatenedScore'] = 100 - threatenedFish['threatened'] / threatenedFish['total'] * 100
threatenedFish.to_csv("../data/threatenedFishIUCN.csv")

## Using API

This includes 201 fresh water species. No way to filter AFAIK

In [372]:
# Get list ISO codes as used in the IUCN Red List API

response = requests.get(
    "https://apiv3.iucnredlist.org/api/v3/country/list?token={0}".format(tokenIUCN)
)
countryList = pd.DataFrame.from_dict(response.json()["results"])
countryList.loc[
    countryList["country"].str.contains(r"^(?=.*United)(?=.*Kingdom)"), "country"
] = "United Kingdom"
countriesISO = countryList[countryList.country.isin(countries)]["isocode"].to_list()

In [373]:
# get species per country
countrySpecies = pd.DataFrame()
for country in countriesISO:
    response = requests.get(
        "https://apiv3.iucnredlist.org/api/v3/country/getspecies/{0}?token={1}".format(
            country, tokenIUCN
        )
    )
    tempDf = pd.DataFrame.from_dict(response.json()["result"])
    tempDf["country"] = country
    countrySpecies = pd.concat([countrySpecies, tempDf], axis=0)

In [374]:
# Get list of species in Europe with classification (Vulnerable, Endangered, etc.)
pageNum = list(range(0, 16))
species = pd.DataFrame()
for pageNum in pageNum:
    response = requests.get(
        "https://apiv3.iucnredlist.org/api/v3/species/page/{0}?token={1}".format(
            pageNum, tokenIUCN
        )
    )
    tempDf = pd.DataFrame.from_dict(response.json()["result"])
    species = pd.concat([species, tempDf], axis=0)

In [380]:
# filter fishes as per https://portals.iucn.org/library/node/45188, BUT these also include freshwater species
speciesMarine = species[
    species.class_name.isin(["CHONDRICHTHYES", "ACTINOPTERYGII", "MYXINI"])
]
countriesMarine = countrySpecies.merge(
    speciesMarine, on="scientific_name", how="inner", suffixes=("", "_y")
)
countriesMarine.drop(countriesMarine.filter(regex="_y$").columns, axis=1, inplace=True)
# We calculate the percentage of threatened species by country
categories = ["VU", "CR", "LC", "EN", "DD", "NT", "NA", "EX", "EW"]
threatened = ["VU", "CR", "EN", "EX", "EW"]
totalCountry = (
    countriesMarine[countriesMarine.category.isin(categories)]
    .groupby(["country", "category"])
    .size()
    .reset_index(name="count")
    .groupby(["country"])
    .sum(numeric_only=True)
)
threatenedCountry = (
    countriesMarine[countriesMarine.category.isin(threatened)]
    .groupby(["country", "category"])
    .size()
    .reset_index(name="count")
    .groupby(["country"])
    .sum(numeric_only=True)
)
totalCountry
threatenedCountry
# Inverse to make a score 0-100 with 0 being the worst
threatenedPct = 100 - threatenedCountry / totalCountry * 100
threatenedPct.rename(columns={"count": "2023"}, inplace=True)
threatenedPct

Unnamed: 0_level_0,count
country,Unnamed: 1_level_1
BE,195
DE,248
DK,243
EE,70
ES,1159
FI,79
FR,1008
GB,593
IE,491
LT,87


Unnamed: 0_level_0,count
country,Unnamed: 1_level_1
BE,29
DE,55
DK,34
EE,4
ES,116
FI,5
FR,99
GB,74
IE,53
LT,13


Unnamed: 0_level_0,2023
country,Unnamed: 1_level_1
BE,85.128205
DE,77.822581
DK,86.00823
EE,94.285714
ES,89.991372
FI,93.670886
FR,90.178571
GB,87.521079
IE,89.205703
LT,85.057471
