# Data Preperation

## 1. Import libraries

In [684]:
import pandas as pd

In [685]:
data = pd.read_csv('observation.csv')
data

Unnamed: 0,id,image_url,scientific_name,common_name,iconic_taxon_name
0,39,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia
1,40,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia
2,80,https://inaturalist-open-data.s3.amazonaws.com...,Callisaurus draconoides,Zebra-tailed Lizard,Reptilia
3,203,http://static.inaturalist.org/photos/132/mediu...,Arctia virginalis,Ranchman's Tiger Moth,Insecta
4,523,https://inaturalist-open-data.s3.amazonaws.com...,Malacosoma disstria,Forest Tent Caterpillar Moth,Insecta
...,...,...,...,...,...
303675,156384671,https://static.inaturalist.org/photos/27041880...,Duttaphrynus melanostictus,Asian Common Toad,Amphibia
303676,156387550,https://inaturalist-open-data.s3.amazonaws.com...,Bufo bufo,Gewone Pad,Amphibia
303677,156391245,https://static.inaturalist.org/photos/27043003...,Crotalus adamanteus,Eastern Diamondback Rattlesnake,Reptilia
303678,156392005,https://inaturalist-open-data.s3.amazonaws.com...,Hyla arborea,Boomkikker,Amphibia


## 2. Check empty values.

###  Check empty values in image_url
I am going to check whether there are any rows with the same scientific_name, where one of the rows has an image URL and the other does not. This will allow me to transfer the URL from the row containing the URL to the row with the empty URL.

In [686]:
# Count the number of empty 'image_url' values
num_empty_image_urls = data['image_url'].isna().sum()

# Print the number of empty 'image_url' values
print(f"Number of empty 'image_url' values: {num_empty_image_urls}")


Number of empty 'image_url' values: 1032


In [687]:
# Find the rows with empty 'image_url' values
empty_image_url_rows = data.loc[data['image_url'].isna()]

# Iterate through the empty 'image_url' rows
for index, row in empty_image_url_rows.iterrows():
    # Find a row with the same 'scientific_name' and non-empty 'image_url'
    matching_row = data.loc[
        (data['scientific_name'] == row['scientific_name']) &
        (~data['image_url'].isna())
    ].head(1)

    # If a matching row is found
    if not matching_row.empty:
        # Copy the 'image_url' value from the matching row to the current row
        data.loc[index, 'image_url'] = matching_row['image_url'].values[0]

# Print the updated dataset
data

Unnamed: 0,id,image_url,scientific_name,common_name,iconic_taxon_name
0,39,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia
1,40,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia
2,80,https://inaturalist-open-data.s3.amazonaws.com...,Callisaurus draconoides,Zebra-tailed Lizard,Reptilia
3,203,http://static.inaturalist.org/photos/132/mediu...,Arctia virginalis,Ranchman's Tiger Moth,Insecta
4,523,https://inaturalist-open-data.s3.amazonaws.com...,Malacosoma disstria,Forest Tent Caterpillar Moth,Insecta
...,...,...,...,...,...
303675,156384671,https://static.inaturalist.org/photos/27041880...,Duttaphrynus melanostictus,Asian Common Toad,Amphibia
303676,156387550,https://inaturalist-open-data.s3.amazonaws.com...,Bufo bufo,Gewone Pad,Amphibia
303677,156391245,https://static.inaturalist.org/photos/27043003...,Crotalus adamanteus,Eastern Diamondback Rattlesnake,Reptilia
303678,156392005,https://inaturalist-open-data.s3.amazonaws.com...,Hyla arborea,Boomkikker,Amphibia


#### Check if there are still empty values in the Image_url.

I will verify whether there are any remaining empty values in the Image_url column. If there are, I will exclude those rows as they will not be useful for image classification.

In [688]:
# Count the number of empty 'image_url' values
num_empty_image_urls = data['image_url'].isna().sum()

# Print the number of empty 'image_url' values
print(f"Number of empty 'image_url' values: {num_empty_image_urls}")


Number of empty 'image_url' values: 14


As there are still 14 rows that lack an image URL, my next step is to remove those rows

In [689]:
# Remove rows with empty 'image_url' values
data = data.dropna(subset=['image_url'])

# Count the number of empty 'image_url' values
num_empty_image_urls = data['image_url'].isna().sum()

# Print the number of empty 'image_url' values
print(f"Number of empty 'image_url' values: {num_empty_image_urls}")


Number of empty 'image_url' values: 0


### Check empty values in common name.

In [690]:
# Check for missing values in the common_name column
missing_common_names = data['common_name'].isnull()

# Count the number of missing common names
num_missing_common_names = missing_common_names.sum()

# Print the number of missing common names
print(f"There are {num_missing_common_names} missing common names in the data.")


There are 27766 missing common names in the data.


I want to verify if it is possible to copy the missing `common_name` values when the `common_name` column is empty (NaN) and the `scientific_name` column matches a row where the `common_name` is not empty.

In [691]:
def check_common_name_variations(group):
    has_nan = group['common_name'].isna().any()
    has_value = group['common_name'].notna().any()
    return has_nan and has_value

groups = data.groupby('scientific_name')
duplicates_with_varied_common_names = groups.apply(check_common_name_variations)
duplicates_with_varied_common_names = duplicates_with_varied_common_names[duplicates_with_varied_common_names]

print(f"There is {len(duplicates_with_varied_common_names)} scientific name where some common name values are missing.")
print(duplicates_with_varied_common_names)


There is 1 scientific name where some common name values are missing.
scientific_name
Leptidea sinapis    True
dtype: bool


In [692]:
# Select the rows with the matching scientific name
matching_rows = data.loc[data['scientific_name'] == 'Leptidea sinapis']

# Output the matching rows
matching_rows


Unnamed: 0,id,image_url,scientific_name,common_name,iconic_taxon_name
40298,15382084,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
44516,16896561,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
50741,19247806,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
58898,22762965,https://static.inaturalist.org/photos/35275688...,Leptidea sinapis,Wood White,Insecta
59027,22831485,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
64060,25905213,https://static.inaturalist.org/photos/40140062...,Leptidea sinapis,Wood White,Insecta
79322,31351142,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
101936,41908484,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
103205,42480138,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
107365,45149613,https://static.inaturalist.org/photos/71584977...,Leptidea sinapis,Wood White,Insecta


As there are some instances where the `common_name` column is missing, but the `scientific_name` column is the same as other rows that have a non-missing `common_name`, we can copy the values of the non-missing `common_name` to fill in the missing values.

In [693]:
# Fill missing common names with 'Wood White' for Leptidea sinapis
data.loc[data['scientific_name'] == 'Leptidea sinapis', 'common_name'] = data.loc[data['scientific_name'] == 'Leptidea sinapis', 'common_name'].fillna('Wood White')

# Print the updated rows
data.loc[data['scientific_name'] == 'Leptidea sinapis']

Unnamed: 0,id,image_url,scientific_name,common_name,iconic_taxon_name
40298,15382084,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
44516,16896561,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
50741,19247806,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
58898,22762965,https://static.inaturalist.org/photos/35275688...,Leptidea sinapis,Wood White,Insecta
59027,22831485,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
64060,25905213,https://static.inaturalist.org/photos/40140062...,Leptidea sinapis,Wood White,Insecta
79322,31351142,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
101936,41908484,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
103205,42480138,https://inaturalist-open-data.s3.amazonaws.com...,Leptidea sinapis,Wood White,Insecta
107365,45149613,https://static.inaturalist.org/photos/71584977...,Leptidea sinapis,Wood White,Insecta


Now I'll check if the problem is solved.

In [694]:
def check_common_name_variations(group):
    has_nan = group['common_name'].isna().any()
    has_value = group['common_name'].notna().any()
    return has_nan and has_value

groups = data.groupby('scientific_name')
duplicates_with_varied_common_names = groups.apply(check_common_name_variations)
duplicates_with_varied_common_names = duplicates_with_varied_common_names[duplicates_with_varied_common_names]

print(f"There is {len(duplicates_with_varied_common_names)} scientific name where some common name values are missing.")
print(duplicates_with_varied_common_names)

There is 0 scientific name where some common name values are missing.
Series([], dtype: bool)


In [695]:
# Check for missing values in the common_name column
missing_common_names = data['common_name'].isnull()

# Count the number of missing common names
num_missing_common_names = missing_common_names.sum()

# Print the number of missing common names
print(f"There are {num_missing_common_names} missing common names in the data.")


There are 27760 missing common names in the data.


Although some of the missing values have been filled in, there are still many values missing in the `common_name` column. To investigate this further, I am going to check which `common_name` values are missing for each unique `scientific_name`.

In [696]:
# Count the number of missing values per scientific name
missing_values_by_name = data.groupby('scientific_name')['common_name'].apply(lambda x: x.isnull().sum())

# Sort the rows by the number of missing values in descending order
missing_values_by_name = missing_values_by_name.sort_values(ascending=False)

# Print the resulting DataFrame
missing_values_by_name.head(10)


scientific_name
Acanthocephala terminalis    158
Carabus coriaceus             87
Acleris                       79
Laphria thoracica             74
Issus coleoptratus            63
Citheronia laocoon            61
Phoneutria depilata           59
Leptoglossus zonatus          55
Pantherodes pardalaria        52
Ernolatia moorei              49
Name: common_name, dtype: int64

In [697]:
# Fill missing common names with 'eurema salome' for Abaeis salome jamapa
data.loc[data['scientific_name'] == 'Abaeis salome jamapa', 'common_name'] = 'Eurema Salome'
data.loc[data['scientific_name'] == 'Acanthocephala terminalis', 'common_name'] = 'Leaf Footed Bug'
data.loc[data['scientific_name'] == 'Carabus coriaceus', 'common_name'] = 'Leather Beetle'
data.loc[data['scientific_name'] == 'Laphria thoracica', 'common_name'] = 'Pebble Bee Robberfly'
data.loc[data['scientific_name'] == 'Citheronia laocoon', 'common_name'] = 'Laocoon Sphinx Moth'


Since there are too many missing `common_name` values to check every one of them individually, I am going to fill the remaining empty values in the `common_name` column with the corresponding value in the `scientific_name` column.

In [698]:
# Fill missing common names with the scientific name
data['common_name'] = data['common_name'].fillna(data['scientific_name'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['common_name'] = data['common_name'].fillna(data['scientific_name'])


In [699]:
# Check for missing values in the common_name column
missing_common_names = data['common_name'].isnull()

# Count the number of missing common names
num_missing_common_names = missing_common_names.sum()

# Print the number of missing common names
print(f"There are {num_missing_common_names} missing common names in the data.")

There are 0 missing common names in the data.


In [700]:
data.isnull().sum()

id                   0
image_url            0
scientific_name      0
common_name          0
iconic_taxon_name    0
dtype: int64

I have removed all the empty values from the dataset

## 3. Check for duplicated names.

### Check for duplicate common names in the dataset
I will verify if there are any instances of different common name values for the same scientific name.

In [701]:
# Find scientific names with more than one unique common name
duplicates = data.groupby('scientific_name')['common_name'].nunique()
duplicates = duplicates[duplicates > 1]

# Print the duplicates (if any)
if len(duplicates) > 0:
    for name in duplicates.index:
        print(f"Scientific Name: {name}")
        subset = data[data['scientific_name'] == name]
        counts = subset.groupby('common_name').size()
        print(counts)
else:
    print("No scientific names have multiple unique common names.")


Scientific Name: Enoplognatha ovata
common_name
Candy-striped Spider complex    3
Common candy-striped spider     9
dtype: int64


In [702]:
# Find scientific names with more than one unique common name
duplicates = data.groupby('scientific_name')['common_name'].nunique()
duplicates = duplicates[duplicates > 1]

# Replace different common names with the same name for each scientific name
for name in duplicates.index:
    subset = data[data['scientific_name'] == name]
    new_common_name = subset['common_name'].mode().values[0]
    data.loc[data['scientific_name'] == name, 'common_name'] = new_common_name

# Print the duplicates (if any)
if len(duplicates) > 0:
    for name in duplicates.index:
        print(f"Scientific Name: {name}")
        subset = data[data['scientific_name'] == name]
        counts = subset.groupby('common_name').size()
        print(counts)
else:
    print("No scientific names have multiple unique common names.")


Scientific Name: Enoplognatha ovata
common_name
Common candy-striped spider    12
dtype: int64


### Change dutch names to English

I cannot check the entire dataset for Dutch names, but I will make changes to the ones I can find

In [703]:
data.head(20)

Unnamed: 0,id,image_url,scientific_name,common_name,iconic_taxon_name
0,39,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia
1,40,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia
2,80,https://inaturalist-open-data.s3.amazonaws.com...,Callisaurus draconoides,Zebra-tailed Lizard,Reptilia
3,203,http://static.inaturalist.org/photos/132/mediu...,Arctia virginalis,Ranchman's Tiger Moth,Insecta
4,523,https://inaturalist-open-data.s3.amazonaws.com...,Malacosoma disstria,Forest Tent Caterpillar Moth,Insecta
5,531,http://static.inaturalist.org/photos/606/mediu...,Halysidota harrisii,Sycamore Tussock Moth,Insecta
6,675,https://inaturalist-open-data.s3.amazonaws.com...,Apatelodes torrefacta,Spotted Apatelodes Moth,Insecta
7,1227,https://inaturalist-open-data.s3.amazonaws.com...,Thamnophis elegans terrestris,Coast Garter Snake,Reptilia
8,1233,https://inaturalist-open-data.s3.amazonaws.com...,Hypsiglena ochrorhynchus nuchalata,California Nightsnake,Reptilia
9,1749,https://inaturalist-open-data.s3.amazonaws.com...,Pseudacris sierra,Sierran Tree Frog,Amphibia


In [704]:
data.tail(20)

Unnamed: 0,id,image_url,scientific_name,common_name,iconic_taxon_name
303660,156331043,https://inaturalist-open-data.s3.amazonaws.com...,Crotaphytus collaris,Eastern Collared Lizard,Reptilia
303661,156331051,https://inaturalist-open-data.s3.amazonaws.com...,Crotaphytus collaris,Eastern Collared Lizard,Reptilia
303662,156333234,https://inaturalist-open-data.s3.amazonaws.com...,Alligator mississippiensis,Mississippialligator,Reptilia
303663,156334904,https://inaturalist-open-data.s3.amazonaws.com...,Crotalus,Rattlesnakes,Reptilia
303664,156339954,https://static.inaturalist.org/photos/27033005...,Crotaphytus collaris,Eastern Collared Lizard,Reptilia
303665,156341197,https://inaturalist-open-data.s3.amazonaws.com...,Sceloporus occidentalis,Western Fence Lizard,Reptilia
303666,156346675,https://inaturalist-open-data.s3.amazonaws.com...,Hyla squirella,Squirrel Treefrog,Amphibia
303667,156349666,https://static.inaturalist.org/photos/27034925...,Hylorina sylvatica,Emerald Forest Frog,Amphibia
303668,156351589,https://static.inaturalist.org/photos/27035364...,Hypsiglena ochrorhynchus,Coast Night Snake,Reptilia
303669,156355036,https://static.inaturalist.org/photos/27036014...,Testudo graeca,Moorse Landschildpad,Reptilia


In [705]:
data['common_name'] = data['common_name'].replace({
    'Boomkikker': 'European Tree Frog',
    'Gewone Pad': 'Common Toad',
    # Add more replacements as needed
})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['common_name'] = data['common_name'].replace({


## Add a column indicating whether the species is dangerous.


In [706]:
data

Unnamed: 0,id,image_url,scientific_name,common_name,iconic_taxon_name
0,39,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia
1,40,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia
2,80,https://inaturalist-open-data.s3.amazonaws.com...,Callisaurus draconoides,Zebra-tailed Lizard,Reptilia
3,203,http://static.inaturalist.org/photos/132/mediu...,Arctia virginalis,Ranchman's Tiger Moth,Insecta
4,523,https://inaturalist-open-data.s3.amazonaws.com...,Malacosoma disstria,Forest Tent Caterpillar Moth,Insecta
...,...,...,...,...,...
303675,156384671,https://static.inaturalist.org/photos/27041880...,Duttaphrynus melanostictus,Asian Common Toad,Amphibia
303676,156387550,https://inaturalist-open-data.s3.amazonaws.com...,Bufo bufo,Common Toad,Amphibia
303677,156391245,https://static.inaturalist.org/photos/27043003...,Crotalus adamanteus,Eastern Diamondback Rattlesnake,Reptilia
303678,156392005,https://inaturalist-open-data.s3.amazonaws.com...,Hyla arborea,European Tree Frog,Amphibia


In [751]:
unique_species = data.drop_duplicates(subset='scientific_name')
unique_species.iloc[1150:1200]['scientific_name']


1830           Mantidactylus cowanii
1832             Pseudoleon superbus
1834    Lycognathophis seychellensis
1835                 Kaloula pulchra
1836                 Anolis ricordii
1841     Lycodryas pseudogranuliceps
1842             Astrochelys radiata
1844       Madagascarophis ocellatus
1847              Furcifer lateralis
1848                   Hyles lineata
1850              Varanus marmoratus
1854                Ballus armadillo
1857          Schistocerca damnifica
1858              Phidippus princeps
1860                  Thereva comata
1861                Testudo hermanni
1865              Ambystoma rosaceum
1867                      Pelophylax
1871               Tantilla gracilis
1874                 Hamadryas arete
1875                Vanessa atalanta
1879                 Ischnura posita
1880                Dolomedes triton
1883            Amphiuma tridactylum
1885           Anolis microlepidotus
1886            Boreocanthon simplex
1891                 Sauromalus ater
1

In [757]:
#Number of unique species in dataset
num_rows = unique_species.shape[0]
print("Number of rows in unique species dataframe:", num_rows)


Number of rows in unique species dataframe: 21183


In [752]:
import ast

# Load the dangerous_species dictionary from a text file
with open('dangerous_species.txt', 'r') as f:
    dangerous_species_str = f.read()
    dangerous_species = ast.literal_eval(dangerous_species_str)

# Update the dangerous and reason columns in the DataFrame based on the dangerous_species dictionary
for species, reason in dangerous_species.items():
    data.loc[data["scientific_name"] == species, "dangerous"] = True
    data.loc[data["scientific_name"] == species, "reason"] = reason

# Not dangerous
data.loc[data['scientific_name'] == 'Actias luna', 'dangerous'] = False
data.loc[data['scientific_name'] == 'Antheraea polyphemus', 'dangerous'] = False
data.loc[data['scientific_name'] == 'Thamnophis sirtalis', 'dangerous'] = False
data.loc[data['scientific_name'] == 'Sceloporus occidentalis', 'dangerous'] = False
data.loc[data['scientific_name'] == 'Cicada orni', 'dangerous'] = False
data.loc[data['scientific_name'] == 'Sympetrum', 'dangerous'] = False
data.loc[data['scientific_name'] == 'Bombus', 'dangerous'] = False


In [753]:
# Fill all other rows with False
data.loc[0:1961, 'dangerous'] = data.loc[0:1961, 'dangerous'].fillna(False)

In [739]:
data

Unnamed: 0,id,image_url,scientific_name,common_name,iconic_taxon_name,dangerous,reason
0,39,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia,True,"Contains tetrodotoxin (TTX), a potent neurotox..."
1,40,https://inaturalist-open-data.s3.amazonaws.com...,Taricha torosa,California Newt,Amphibia,True,"Contains tetrodotoxin (TTX), a potent neurotox..."
2,80,https://inaturalist-open-data.s3.amazonaws.com...,Callisaurus draconoides,Zebra-tailed Lizard,Reptilia,False,
3,203,http://static.inaturalist.org/photos/132/mediu...,Arctia virginalis,Ranchman's Tiger Moth,Insecta,False,
4,523,https://inaturalist-open-data.s3.amazonaws.com...,Malacosoma disstria,Forest Tent Caterpillar Moth,Insecta,False,
...,...,...,...,...,...,...,...
303675,156384671,https://static.inaturalist.org/photos/27041880...,Duttaphrynus melanostictus,Asian Common Toad,Amphibia,True,May secrete a toxic skin secretion if threaten...
303676,156387550,https://inaturalist-open-data.s3.amazonaws.com...,Bufo bufo,Common Toad,Amphibia,True,"Secretes toxic skin secretions if threatened, ..."
303677,156391245,https://static.inaturalist.org/photos/27043003...,Crotalus adamanteus,Eastern Diamondback Rattlesnake,Reptilia,,
303678,156392005,https://inaturalist-open-data.s3.amazonaws.com...,Hyla arborea,European Tree Frog,Amphibia,,


In [758]:
empty_values = data['dangerous'].isna().sum()
print(f"Number of empty values in 'dangerous' column: {empty_values}")



Number of empty values in 'dangerous' column: 240990


In [759]:
unique_species_data = data.drop_duplicates(subset=['scientific_name'], keep='first')
empty_values = unique_species_data['dangerous'].isna().sum()
print(f"Number of empty values in 'dangerous' column for unique species: {empty_values}")


Number of empty values in 'dangerous' column for unique species: 19975


Sort data on most populair

In [717]:
sorted_data = data.sort_values(by='scientific_name', key=lambda x: x.map(data['scientific_name'].value_counts()), ascending=False)


In [723]:
sorted_data.head(40000)

Unnamed: 0,id,image_url,scientific_name,common_name,iconic_taxon_name,dangerous,reason
145137,61894454,https://inaturalist-open-data.s3.amazonaws.com...,Danaus plexippus,Monarch,Insecta,True,Not toxic but may cause skin irritation due to...
285908,142932173,https://static.inaturalist.org/photos/24511799...,Danaus plexippus,Monarch,Insecta,True,Not toxic but may cause skin irritation due to...
196083,89991856,https://inaturalist-open-data.s3.amazonaws.com...,Danaus plexippus,Monarch,Insecta,True,Not toxic but may cause skin irritation due to...
277441,137149742,https://inaturalist-open-data.s3.amazonaws.com...,Danaus plexippus,Monarch,Insecta,True,Not toxic but may cause skin irritation due to...
46274,17542504,https://static.inaturalist.org/photos/26705436...,Danaus plexippus,Monarch,Insecta,True,Not toxic but may cause skin irritation due to...
...,...,...,...,...,...,...,...
88062,34914789,https://inaturalist-open-data.s3.amazonaws.com...,Xylocopa virginica,Eastern Carpenter Bee,Insecta,,
134280,57057035,https://static.inaturalist.org/photos/90979168...,Xylocopa virginica,Eastern Carpenter Bee,Insecta,,
44963,17069869,https://static.inaturalist.org/photos/25854905...,Xylocopa virginica,Eastern Carpenter Bee,Insecta,,
136057,57755439,https://static.inaturalist.org/photos/92154234...,Xylocopa virginica,Eastern Carpenter Bee,Insecta,,
