In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from shapely.geometry import Point
import geopandas as gpd
import matplotlib.colors as mcolors
import contextily as ctx 


In [4]:
geo_regions = pd.read_csv("Data\geographical_regions.csv")
animals = pd.read_csv("Data\species_info_annie.csv")
taxonomy = pd.read_csv("traits (1).csv")
env = pd.read_csv("Data\species\environmental_df.csv")

In [34]:
geo_regions['Latitude Min'] = geo_regions['Latitude Range'].apply(lambda x: x[0])
geo_regions['Latitude Max'] = geo_regions['Latitude Range'].apply(lambda x: x[1])
geo_regions['Longitude Min'] = geo_regions['Longitude Range'].apply(lambda x: x[0])
geo_regions['Longitude Max'] = geo_regions['Longitude Range'].apply(lambda x: x[1])

# Define a function to find the matching leaf label for each row in env using vectorized operations
def get_leaf_label(row):
    # Vectorized comparison
    match = geo_regions[
        (geo_regions['Latitude Min'] <= row['Latitude']) & 
        (geo_regions['Latitude Max'] >= row['Latitude']) &
        (geo_regions['Longitude Min'] <= row['Longitude']) & 
        (geo_regions['Longitude Max'] >= row['Longitude'])
    ]
    # If a match is found, return the Leaf Number; otherwise, return None
    if not match.empty:
        return match['Leaf Number'].values[0]  # Return the first match if multiple matches
    else:
        return None

# Apply the function to each row in env to add the Leaf Label
env['Leaf Label'] = env.apply(get_leaf_label, axis=1)

In [41]:
env['Unique Label'] = env.apply(lambda x: f"{x['Leaf Label']}_{x['Cluster']}", axis=1)

In [44]:
df = env[["ID", "Unique Label"]]
df

Unnamed: 0,ID,Unique Label
0,31529,168.0_0.0
1,31529,168.0_0.0
2,31529,168.0_0.0
3,31529,168.0_0.0
4,31529,168.0_0.0
...,...,...
271265,145031,145.0_3.0
271266,145031,145.0_3.0
271267,145031,145.0_3.0
271268,145031,145.0_3.0


In [9]:
# Count the number of NaN values for each column
nan_counts = taxonomy.isna().sum()

# Convert the column names and their NaN counts into a list of tuples
nan_counts_list = [(col, count) for col, count in nan_counts.items()]

# Print the list of columns with their NaN counts
print("List of columns with their NaN counts:")
print(nan_counts_list)


List of columns with their NaN counts:
[('Species ID', 0), ('Body symmetry', 6), ('actual evapotranspiration rate in geographic range (millimeters per month)', 425), ('adult yearly survival (percent)', 493), ('age at eye opening (days)', 477), ('age at maturity (days)', 456), ('amino acid composition of milk', 493), ('animal population density (individuals per square kilometer)', 455), ('are commensal with', 493), ('are eaten by', 338), ('are host of', 291), ('are killed by', 482), ('are mutualistic with', 479), ('are parasitized by', 310), ('are preyed upon by', 317), ('are vectors for', 486), ('ash content (percent)', 488), ('auditory system', 3), ('basal metabolic rate (watts)', 448), ('behavioral circadian rhythm', 316), ('bill length (mm)', 488), ('body length (mm)', 422), ('body mass (g)', 152), ('body shape', 391), ('body temperature (kelvin)', 458), ('breeding habitat', 488), ('carbohydrate composition of milk', 484), ('cellularity', 3), ('clutch/brood/litter size', 374), ('co-

In [12]:
taxonomy["population trend"].value_counts()

population trend
('Stable',)                     220
('Decreasing',)                 162
('Unknown',)                     35
('Increasing',)                  25
('Decreasing', 'Decreasing')      1
Name: count, dtype: int64

In [14]:
cols = ["habitat", "conservation status", "ecoregion", "body temperature (kelvin)", "locomotion", "motility", "population trend"]

taxonomy[cols]

Unnamed: 0,habitat,conservation status,ecoregion,body temperature (kelvin),locomotion,motility,population trend
0,('terrestrialURI:http://purl.obolibrary.org/ob...,('least concernURI:http://eol.org/schema/terms...,('Mississippi Lowland ForestsURI:http://eol.or...,,('quadrupedal movementURI:https://www.wikidata...,('actively mobileURI:http://eol.org/schema/ter...,"('Stable',)"
1,('terrestrialURI:http://purl.obolibrary.org/ob...,('least concernURI:http://eol.org/schema/terms...,('Hispaniolan Moist ForestsURI:http://eol.org/...,,('saltationURI:http://purl.obolibrary.org/obo/...,,
2,('freshwater habitatURI:http://purl.obolibrary...,('least concernURI:http://eol.org/schema/terms...,('East Sudanian SavannaURI:http://eol.org/sche...,,('saltationURI:http://purl.obolibrary.org/obo/...,,"('Unknown',)"
3,('coastalURI:http://purl.obolibrary.org/obo/EN...,('near threatenedURI:http://eol.org/schema/ter...,,,('quadrupedal movementURI:https://www.wikidata...,,"('Stable',)"
4,('temperateURI:http://purl.obolibrary.org/obo/...,('least concernURI:http://eol.org/schema/terms...,('Allegheny Highlands ForestsURI:http://eol.or...,,('quadrupedal movementURI:https://www.wikidata...,('actively mobileURI:http://eol.org/schema/ter...,"('Stable',)"
...,...,...,...,...,...,...,...
490,('terrestrialURI:http://purl.obolibrary.org/ob...,('least concernURI:http://eol.org/schema/terms...,('Central Zambezian Miombo WoodlandsURI:http:/...,,('concertinaURI:https://www.wikidata.org/entit...,,"('Unknown',)"
491,('terrestrialURI:http://purl.obolibrary.org/ob...,('least concernURI:http://eol.org/schema/terms...,('Eastern Highlands Moist Deciduous ForestsURI...,,('concertinaURI:https://www.wikidata.org/entit...,,"('Unknown',)"
492,('terrestrial habitatURI:http://purl.obolibrar...,,('Western Australian Mulga ShrublandsURI:http:...,,('runningURI:http://purl.obolibrary.org/obo/NB...,,
493,('woodland canopyURI:http://purl.obolibrary.or...,('least concernURI:http://eol.org/schema/terms...,('Eastern Java-Bali Rain ForestsURI:http://eol...,,('runningURI:http://purl.obolibrary.org/obo/NB...,,"('Stable',)"
