In [2]:
import requests
import os
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import urlencode

In [2]:

# Send a GET request to the Audubon bird guide website
url = "https://www.audubon.org/bird-guide"
response = requests.get(url)

# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')

# Find the specific section containing the bird names
# For example, if the bird names are in a div with class "bird-names-section", you would do:
bird_section = soup.find('div', class_='birdguide-search-results')

# Extract bird names from the section
bird_names = [name.text.strip() for name in bird_section.find_all('span', class_='bird-card-title')]

# Create a dataframe
df = pd.DataFrame({'Bird Names': bird_names})

# Display the dataframe
print(df)

                    Bird Names
0            Northern Cardinal
1                   Barred Owl
2                     Blue Jay
3              Red-tailed Hawk
4             Great Horned Owl
..                         ...
828  Wedge-rumped Storm-petrel
829            Murphy's Petrel
830              Thayer's Gull
831    Iceland Gull (Thayer's)
832          Western Scrub-Jay

[833 rows x 1 columns]


In [3]:
df.tail(10)

Unnamed: 0,Bird Names
823,Siberian Rubythroat
824,Olive-backed Pipit
825,Piratic Flycatcher
826,Mottled Petrel
827,Pechora Pipit
828,Wedge-rumped Storm-petrel
829,Murphy's Petrel
830,Thayer's Gull
831,Iceland Gull (Thayer's)
832,Western Scrub-Jay


In [4]:
def format_bird_name(bird_name):
    # Remove parenthesis and content within them
    if "(" in bird_name:
        bird_name = bird_name.split("(")[0]
    
    # Split on slash and take the name to the right
    if "/" in bird_name:
        bird_name = bird_name.split("/")[-1]
    
    # Add hyphen between words
    formatted_name = "-".join(bird_name.split())
    
    # Convert to lowercase
    formatted_name = formatted_name.lower()
    
    # Remove apostrophes
    formatted_name = formatted_name.replace("'", "")
    
    return formatted_name.strip()  # Strip any leading/trailing spaces

# Apply the formatting function to each row of the dataframe
df_formatted_names = df.apply(lambda row: format_bird_name(row['Bird Names']), axis=1)

# Create a new dataframe with the formatted names
df_formatted = pd.DataFrame({'Formatted Names': df_formatted_names})

print(df_formatted)

               Formatted Names
0            northern-cardinal
1                   barred-owl
2                     blue-jay
3              red-tailed-hawk
4             great-horned-owl
..                         ...
828  wedge-rumped-storm-petrel
829             murphys-petrel
830               thayers-gull
831               iceland-gull
832          western-scrub-jay

[833 rows x 1 columns]


In [5]:
df_formatted.head(10)

Unnamed: 0,Formatted Names
0,northern-cardinal
1,barred-owl
2,blue-jay
3,red-tailed-hawk
4,great-horned-owl
5,house-finch
6,gray-catbird
7,mourning-dove
8,american-crow
9,american-robin


In [6]:
rawbirdata= pd.DataFrame(index = range(839), columns=["name", "sciname", "webname", "at_glance", "category", "conservation", "habitat", "region", "behavior", "population", "range_image", "description", "size", "color", "wing_shape", "tail_shape", "call_pattern", "call_type", "calls_audio", "habitat_ext", "eggs", "young", "feeding_behavior", "diet", "nesting", "images"])

base_url = "https://www.audubon.org/field-guide/bird/"

In [7]:
rawbirdata["webname"] = df_formatted["Formatted Names"]
for index, row in rawbirdata.iterrows():
    rawbirdata.at[index, 'images'] = []
    rawbirdata.at[index, 'calls_audio'] = {}
rawbirdata.head(5)

Unnamed: 0,name,sciname,webname,at_glance,category,conservation,habitat,region,behavior,population,...,call_pattern,call_type,calls_audio,habitat_ext,eggs,young,feeding_behavior,diet,nesting,images
0,,,northern-cardinal,,,,,,,,...,,,{},,,,,,,[]
1,,,barred-owl,,,,,,,,...,,,{},,,,,,,[]
2,,,blue-jay,,,,,,,,...,,,{},,,,,,,[]
3,,,red-tailed-hawk,,,,,,,,...,,,{},,,,,,,[]
4,,,great-horned-owl,,,,,,,,...,,,{},,,,,,,[]


In [8]:

from selenium import webdriver
from selenium.webdriver.common.keys import Keys  
from selenium.webdriver.chrome.options import Options  
import time

In [9]:

def getRange (urly):
    chrome_options = Options()
    driver = webdriver.Chrome(options=chrome_options)

    # Specify the URL
    URL = urly

    # Open the URL in the browser
    driver.get(URL)

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    time.sleep(5)

    # Get the page source
    HTML = driver.page_source

    # Close the browser
    driver.quit()

    # Parse the HTML using Beautiful Soup
    soup = BeautifulSoup(HTML, 'html.parser')

    # Find the <div> element with class "bird-rangemap"
    rangemap_div = soup.find('div', class_='bird_info_item info_migration').find('div', class_='bird-rangemap').find('source')['data-srcset']

    # Print the found element
    return rangemap_div


In [10]:
def getSounds (urly):
    chrome_options = Options()
    driver = webdriver.Chrome(options=chrome_options)


    # Specify the URL
    URL = urly

    # Open the URL in the browser
    driver.get(URL)

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    time.sleep(5)

    # Get the page source
    HTML = driver.page_source

    # Close the browser
    driver.quit()

    # Parse the HTML using Beautiful Soup
    soup = BeautifulSoup(HTML, 'html.parser')

    soundivs = soup.find('div', id='info_sounds').find('div', class_='bird-songs-calls').find_all('figure', class_='track')

    dicBird = {}

    for soundiv in soundivs:
        keyRaw = soundiv.find('figcaption')
        key = keyRaw.get_text(strip=True)
        value = soundiv.find('audio')['data-src']
        dicBird[key] = value


    return dicBird

In [11]:
for index, bird in rawbirdata.iterrows():

    bird_url = base_url + bird["webname"]
    response = requests.get(bird_url)
    soup = BeautifulSoup(response.content, "html.parser")
  
    #CALL AUDIOS
    rawbirdata.at[index, 'calls_audio'] = getSounds(bird_url)

    #RANGE IMAGE
    rawbirdata.at[index, 'range_image'] = getRange(bird_url)

    #BIRD IMAGES
    medias = soup.find_all("div", class_="media-data")
    for media in medias:
        sources = media.find_all("source")
        if sources and sources[0].has_attr('srcset'):
            link_name = sources[0]['srcset']  
        elif sources:
            link_name = sources[0]['data-srcset']  
        else:
            link_name = None 
        current_list = rawbirdata.at[index, 'images']
        current_list.append(link_name)
        rawbirdata.at[index, 'images'] = current_list

  
    #COMMON NAME
    cont_name = soup.find("div", class_="bird-navigation-rail related_arts_present").find("div", class_="bird-meta").find("div", class_="bird-title")
    para_name = cont_name.get_text(strip=True)
    rawbirdata.at[index, "name"] = para_name

    #SCIENTIFIC NAME        
    cont_sciname = soup.find("div", class_="bird-navigation-rail related_arts_present").find("div", class_="bird-meta").find("div", class_="subtitle")
    para_sciname = cont_sciname.get_text(strip=True)
    rawbirdata.at[index, "sciname"] = para_sciname

    #AT A GLANCE
    cont_glance = soup.find("div", class_="content_part").find("div", class_="intro_part").find("div", class_="intro_grid bird_page_grid_4x5").find("div", class_="intro_text")
    para_glance = cont_glance.get_text(strip=True)
    rawbirdata.at[index, "at_glance"] = para_glance

    #CATEGORY
    if soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before layers_icon") != None:
        cont_cat = soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before layers_icon").find("div", class_="tax-value")
        para_cat = cont_cat.get_text(strip=True)
        rawbirdata.at[index, "category"] = para_cat

    #CONSERVATION
    if soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before shield_icon") != None:
        cont_cons = soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before shield_icon").find("div", class_="tax-value")
        para_cons = cont_cons.get_text(strip=True)
        rawbirdata.at[index, "conservation"] = para_cons

    #HABITAT (SMALL)
    if soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before home_icon") != None:    
        cont_hab = soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before home_icon").find("div", class_="tax-value")
        para_hab = cont_hab.get_text(strip=True)
        rawbirdata.at[index, "habitat"] = para_hab

    #REGION
    if soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before map_icon") != None:
        cont_reg = soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before map_icon").find("div", class_="tax-value")
        para_reg = cont_reg.get_text(strip=True)
        rawbirdata.at[index, "region"] = para_reg

    #BEHAVIOR
    if soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before bird_icon") != None:
        cont_beh = soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before bird_icon").find("div", class_="tax-value")
        para_beh = cont_beh.get_text(strip=True)
        rawbirdata.at[index, "behavior"] = para_beh

    #POPULATION
    if soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before flock_icon") != None:
        cont_pop = soup.find("div", class_="bird-taxonomy grid_type").find("div", class_="tax-item icons_dictionary_before flock_icon").find("div", class_="tax-value")
        para_pop = cont_pop.get_text(strip=True)
        rawbirdata.at[index, "population"] = para_pop


    



    #CALL PATTERN, CALL TYPE, SIZE, COLOR, WING SHAPE, TAIL SHAPE
    tax_inlines = soup.find_all("div", class_="bird-taxonomy inline_type")
    for type in tax_inlines:
        if soup.find("div", class_="tax-item icons_dictionary_before advocacy_icon") != None:
            cont_callpat = soup.find("div", class_="tax-item icons_dictionary_before advocacy_icon").find("div", class_="tax-value")
            para_callpat = cont_callpat.get_text(strip=True)
            rawbirdata.at[index, "call_pattern"] = para_callpat

        if soup.find("div", class_="tax-item icons_dictionary_before loudspeaker_icon") != None:
            cont_calltyp = soup.find("div", class_="tax-item icons_dictionary_before loudspeaker_icon").find("div", class_="tax-value")
            para_calltyp = cont_calltyp.get_text(strip=True)
            rawbirdata.at[index, "call_type"] = para_calltyp
        
        if soup.find("div", class_="tax-item icons_dictionary_before size_icon") != None:
            cont_size = soup.find("div", class_="tax-item icons_dictionary_before size_icon").find("div", class_="tax-value")
            para_size = cont_size.get_text(strip=True)
            rawbirdata.at[index, "size"] = para_size

        if soup.find("div", class_="tax-item icons_dictionary_before color_icon") != None:
            cont_color = soup.find("div", class_="tax-item icons_dictionary_before color_icon").find("div", class_="tax-value")
            para_color = cont_color.get_text(strip=True)
            rawbirdata.at[index, "color"] = para_color

        binocs = soup.find_all("div", class_="tax-item icons_dictionary_before binoculars_icon")
        for binoc in binocs:
            if binoc.find("div", class_="tax-title").get_text(strip=True) == "Wing Shape":
                cont_wingshape = binoc.find("div", class_="tax-value")
                para_wingshape = cont_wingshape.get_text(strip=True)
                rawbirdata.at[index, "wing_shape"] = para_wingshape
            else:
                cont_tailshape = binoc.find("div", class_="tax-value")
                para_tailshape = cont_tailshape.get_text(strip=True)
                rawbirdata.at[index, "tail_shape"] = para_tailshape  


        


    

    #DESCRIPTION
    cont_desc = soup.find("div", class_="bird_info_item info_description").find("div", class_="content")
    para_desc = cont_desc.get_text(strip=True)
    rawbirdata.at[index, "description"] = para_desc

    #HABITAT
    cont_habitat = soup.find("div", class_="bird_info_item info_habitat").find("div", class_="content")
    para_habitat = cont_habitat.get_text(strip=True)
    rawbirdata.at[index, "habitat_ext"] = para_habitat

    #EGGS
    cont_eggs = soup.find("div", class_="bird_info_item info_eggs").find("div", class_="content")
    para_eggs = cont_eggs.get_text(strip=True)
    rawbirdata.at[index, "eggs"] = para_eggs

    #FEEDING BEHAVIOR
    cont_feed = soup.find("div", class_="bird_info_item info_feeding").find("div", class_="content")
    para_feed = cont_feed.get_text(strip=True)
    rawbirdata.at[index, "feeding_behavior"] = para_feed

    #YOUNG
    cont_young = soup.find("div", class_="bird_info_item info_young").find("div", class_="content")
    para_young = cont_young.get_text(strip=True)
    rawbirdata.at[index, "young"] = para_young

    #NESTING
    cont_nest = soup.find("div", class_="bird_info_item info_nesting").find("div", class_="content")
    para_nest = cont_nest.get_text(strip=True)
    rawbirdata.at[index, "nesting"] = para_nest

    #DIET
    cont_diet = soup.find("div", class_="bird_info_item info_diet").find("div", class_="content")
    para_diet = cont_diet.get_text(strip=True)
    rawbirdata.at[index, "diet"] = para_diet
rawbirdata.head()



    



Exception ignored in: <function Service.__del__ at 0x000001F98E6199E0>
Traceback (most recent call last):
  File "c:\Users\evandenkooy\AppData\Local\anaconda3\Lib\site-packages\selenium\webdriver\common\service.py", line 189, in __del__
    self.stop()
  File "c:\Users\evandenkooy\AppData\Local\anaconda3\Lib\site-packages\selenium\webdriver\common\service.py", line 146, in stop
    self.send_remote_shutdown_command()
  File "c:\Users\evandenkooy\AppData\Local\anaconda3\Lib\site-packages\selenium\webdriver\common\service.py", line 126, in send_remote_shutdown_command
    request.urlopen(f"{self.service_url}/shutdown")
  File "c:\Users\evandenkooy\AppData\Local\anaconda3\Lib\urllib\request.py", line 216, in urlopen
    return opener.open(url, data, timeout)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\evandenkooy\AppData\Local\anaconda3\Lib\urllib\request.py", line 519, in open
    response = self._open(req, data)
               ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\evand

AttributeError: 'NoneType' object has no attribute 'find_all'

In [13]:
print(rawbirdata["images"].iloc[0])
rawbirdata.head(21)


['https://media.audubon.org/nas_birdapi_hero/h_a1_3725_3_northern-cardinal_carole_wiley_kk_adult-male-and-adult-female_0.jpg?width=380&height=255&auto=webp&quality=90&fit=crop&enable=upscale', 'https://media.audubon.org/nas_birdapi/a1_4524_2_northern-cardinal_diane_wurzer_kk_adult-male.jpg?width=380&height=255&auto=webp&quality=90&fit=crop&enable=upscale', 'https://media.audubon.org/nas_birdapi/apa_2015_donaldbrown_279112_northern_cardinal_kk_adult-female.jpg?width=380&height=255&auto=webp&quality=90&fit=crop&enable=upscale', 'https://media.audubon.org/nas_birdapi/a1_4883_3_northern-cardinal_anthony_louviere_kk_adult-male.jpg?width=380&height=255&auto=webp&quality=90&fit=crop&enable=upscale', 'https://media.audubon.org/nas_birdapi/northern-cardinal_001_juvenile_summer_maryland_ehpien_flickrcc-by-nc-nd-2.0_molting-juvenile.jpg?width=380&height=255&auto=webp&quality=90&fit=crop&enable=upscale', 'https://media.audubon.org/nas_birdapi/a1_6762_4_northern-cardinal-female_rosemary_gillan_adul

Unnamed: 0,name,sciname,webname,at_glance,category,conservation,habitat,region,behavior,population,...,call_pattern,call_type,calls_audio,habitat_ext,eggs,young,feeding_behavior,diet,nesting,images
0,Northern Cardinal,Cardinalis cardinalis,northern-cardinal,"One of our most popular birds, the Cardinal is...","Cardinals, Perching Birds",Low Concern,"Arroyos and Canyons, Desert and Arid Habitats,...","Eastern Canada, Florida, Great Lakes, Mid Atla...","Flitter, Formation, Undulating",130.000.000,...,Falling,"Chirp/Chip, Whistle",{'Songs #1': 'https://media.audubon.org/nas_bi...,"Woodland edges, thickets, suburban gardens, to...","3-4, sometimes 2-5. Whitish to pale bluish or ...",Both parents feed nestlings. Young leave nest ...,Forages mostly while hopping on ground or in l...,"Mostly seeds, insects, berries. Diet is quite ...","Male sings to defend nesting territory, active...",[https://media.audubon.org/nas_birdapi_hero/h_...
1,Barred Owl,Strix varia,barred-owl,The rich baritone hooting of the Barred Owl is...,Owls,Low Concern,"Forests and Woodlands, Freshwater Wetlands, La...","California, Eastern Canada, Florida, Great Lak...","Flap/Glide, Hovering",3.500.000,...,"Complex, Falling, Undulating",Hoot,{'Classic hoots #1': 'https://media.audubon.or...,"Woodlands, wooded river bottoms, wooded swamps...","2-3, rarely 4. White. Incubation is mostly or ...",Female may remain with young much of time at f...,"Hunts by night or day, perhaps most at dawn an...",Mostly small mammals. Eats many mice and other...,Courtship involves both male and female bobbin...,[https://media.audubon.org/nas_birdapi_hero/we...
2,Blue Jay,Cyanocitta cristata,blue-jay,One of the loudest and most colorful birds of ...,"Crows, Magpies, Jays, Perching Birds",Low Concern,"Coasts and Shorelines, Fields, Meadows, and Gr...","California, Eastern Canada, Florida, Great Lak...","Direct Flight, Flap/Glide, Hovering, Undulating",17.000.000,...,"Falling, Flat","Chirp/Chip, Rattle, Raucous, Scream, Whistle",{'Jay calls & clicks': 'https://media.audubon....,"Oak and pine woods, suburban gardens, groves, ...","4-5, sometimes 3-7. Greenish or buff, sometime...",Both parents bring food for nestlings. Young l...,Forages in trees and shrubs and on ground. Com...,Omnivorous. Most of diet is vegetable matter (...,Courtship may involve aerial chases; male may ...,[https://media.audubon.org/nas_birdapi_hero/sf...
3,Red-tailed Hawk,Buteo jamaicensis,red-tailed-hawk,This is the most widespread and familiar large...,"Hawk-like Birds, Hawks and Eagles",Low Concern,"Coasts and Shorelines, Desert and Arid Habitat...","Alaska and The North, California, Eastern Cana...","Direct Flight, Soaring",3.100.000,...,"Falling, Simple",Scream,{'Typical calls #1': 'https://media.audubon.or...,"Open country, woodlands, prairie groves, mount...","2-3, sometimes 4, rarely 1-5. Whitish, blotche...",Female remains with young most of the time dur...,Does most hunting by watching from a high perc...,"Varied, includes small mammals, birds, reptile...","In courtship, male and female soar in high cir...",[https://media.audubon.org/nas_birdapi_hero/h_...
4,Great Horned Owl,Bubo virginianus,great-horned-owl,Found almost throughout North America and much...,Owls,Low Concern,"Coasts and Shorelines, Desert and Arid Habitat...","Alaska and The North, California, Eastern Cana...",,5.700.000,...,,,{'Hoots of pair': 'https://media.audubon.org/n...,"Forests, woodlots, streamsides, open country. ...","2-3, sometimes 1-5, rarely 6. Dull whitish. In...",Both parents take part in providing food for y...,"Hunts mostly at night, sometimes at dusk. Watc...","Varied, mostly mammals and birds. Mammals make...",May begin nesting very early in north (late wi...,[https://media.audubon.org/nas_birdapi_hero/we...
5,House Finch,Haemorhous mexicanus,house-finch,"Adaptable, colorful, and cheery-voiced, House ...","Finches, Perching Birds",Low Concern,"Arroyos and Canyons, Desert and Arid Habitats,...","California, Eastern Canada, Florida, Great Lak...","Flitter, Rapid Wingbeats, Undulating",40.000.000,...,"Complex, Falling, Rising, Undulating","Chirp/Chip, Hi, Trill, Whistle",{'Song #1': 'https://media.audubon.org/nas_bir...,"Cities, suburbs, farms, canyons. Original habi...","4-5, sometimes 2-6. Pale blue, with black and ...",Both parents feed nestlings. Young leave the n...,"Forages on ground, while perching in weeds, or...","Mostly seeds, buds, berries. Almost all of die...",Pairs may begin to form within flocks in winte...,[https://media.audubon.org/nas_birdapi_hero/h_...
6,Gray Catbird,Dumetella carolinensis,gray-catbird,"Rather plain but with lots of personality, the...","Mockingbirds and Thrashers, Perching Birds",Low Concern,"Fields, Meadows, and Grasslands, Forests and W...","California, Eastern Canada, Florida, Great Lak...","Direct Flight, Flitter",29.000.000,...,"Falling, Flat, Undulating","Chatter, Chirp/Chip, Hi, Scream, Whistle",{'Song #1': 'https://media.audubon.org/nas_bir...,"Undergrowth, brush, thorn scrub, suburban gard...","4, sometimes 3-5, rarely 2-6. Greenish blue, r...",Both parents feed the nestlings. Young leave t...,"Does much foraging on ground, flipping leaves ...",Mostly insects and berries. Especially in earl...,"Early in breeding season, male sings constantl...",[https://media.audubon.org/nas_birdapi_hero/we...
7,Mourning Dove,Zenaida macroura,mourning-dove,The mournful cooing of the Mourning Dove is on...,"Pigeon-like Birds, Pigeons and Doves",Low Concern,"Coasts and Shorelines, Desert and Arid Habitat...","Alaska and The North, California, Eastern Cana...",Direct Flight,150.000.000,...,"Falling, Flat, Undulating",Hoot,{'Coo & partial coo': 'https://media.audubon.o...,"Farms, towns, open woods, roadsides, grassland...","2. White. Incubation is by both parents, about...","Both parents feed young ""pigeon milk."" Young l...",Forages mostly on ground; sometimes will perch...,Seeds. Feeds almost entirely on seeds (99% of ...,"In courtship, male flies up with noisy wingbea...",[https://media.audubon.org/nas_birdapi_hero/h_...
8,American Crow,Corvus brachyrhynchos,american-crow,Crows are thought to be among our most intelli...,,Low Concern,,"Alaska and The North, California, Eastern Cana...",Direct Flight,28.000.000,...,"Falling, Flat, Simple","Rattle, Raucous",{'Caws #1': 'https://media.audubon.org/nas_bir...,"Woodlands, farms, fields, river groves, shores...","4-6, sometimes 3-9. Dull blue-green to gray-gr...","Fed by both parents and sometimes by ""helpers....","Opportunistic, quickly taking advantage of new...",Omnivorous. Seems to feed on practically anyth...,"In courtship on ground or in tree, male faces ...",[https://media.audubon.org/nas_birdapi_hero/we...
9,American Robin,Turdus migratorius,american-robin,A very familiar bird over most of North Americ...,"Perching Birds, Thrushes",Low Concern,"Arroyos and Canyons, Coasts and Shorelines, De...","Alaska and The North, California, Eastern Cana...","Direct Flight, Running",370.000.000,...,"Flat, Undulating","Chirp/Chip, Flute, Whistle",{'Song #1': 'https://media.audubon.org/nas_bir...,"Cities, towns, lawns, farmland, forests; in wi...","Usually 4, sometimes 3-7. Pale blue or ""robin'...","Both parents feed young, though female does mo...","Does much foraging on the ground, running and ...","Mostly insects, berries, earthworms. In early ...",Males arrive before females on nesting grounds...,[https://media.audubon.org/nas_birdapi_hero/we...


In [12]:
dummyRows = pd.read_csv("test4birds - test4birds.csv (1).csv")
main_folder = 'bird_images'
if not os.path.exists(main_folder):
    os.makedirs(main_folder)

# Iterate over each row in the DataFrame
for index, row in dummyRows.iterrows():
    # Extract the bird name and the list of image URLs
    bird_name = row['name']
    image_urls = row['images']
    if image_urls != None:
    
        # Create a folder for the current bird within the main folder
        bird_folder = os.path.join(main_folder, bird_name)
        if not os.path.exists(bird_folder):
            os.makedirs(bird_folder)
        
        # Download images for the current bird
        for i, url in enumerate(image_urls):
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    # Save the image with a unique name within the bird's folder
                    with open(os.path.join(bird_folder, f'{bird_name}_{i}.jpg'), 'wb') as f:
                        f.write(response.content)
            except Exception as e:
                print(f"Error downloading image {i+1} for {bird_name}: {e}")

TypeError: type() takes 1 or 3 arguments

In [8]:
dummyRows.head()

Unnamed: 0.1,Unnamed: 0,name,sciname,webname,at_glance,category,conservation,habitat,region,behavior,...,call_pattern,call_type,calls_audio,habitat_ext,eggs,young,feeding_behavior,diet,nesting,images
0,0,Northern Cardinal,Cardinalis cardinalis,northern-cardinal,"One of our most popular birds, the Cardinal is...","Cardinals, Perching Birds",Low Concern,"Arroyos and Canyons, Desert and Arid Habitats,...","Eastern Canada, Florida, Great Lakes, Mid Atla...","Flitter, Formation, Undulating",...,Falling,"Chirp/Chip, Whistle",{'Songs #1': 'https://media.audubon.org/nas_bi...,"Woodland edges, thickets, suburban gardens, to...","3-4, sometimes 2-5. Whitish to pale bluish or ...",Both parents feed nestlings. Young leave nest ...,Forages mostly while hopping on ground or in l...,"Mostly seeds, insects, berries. Diet is quite ...","Male sings to defend nesting territory, active...",['https://media.audubon.org/nas_birdapi_hero/h...
1,1,Barred Owl,Strix varia,barred-owl,The rich baritone hooting of the Barred Owl is...,Owls,Low Concern,"Forests and Woodlands, Freshwater Wetlands, La...","California, Eastern Canada, Florida, Great Lak...","Flap/Glide, Hovering",...,"Complex, Falling, Undulating",Hoot,{'Classic hoots #1': 'https://media.audubon.or...,"Woodlands, wooded river bottoms, wooded swamps...","2-3, rarely 4. White. Incubation is mostly or ...",Female may remain with young much of time at f...,"Hunts by night or day, perhaps most at dawn an...",Mostly small mammals. Eats many mice and other...,Courtship involves both male and female bobbin...,['https://media.audubon.org/nas_birdapi_hero/w...
2,2,Blue Jay,Cyanocitta cristata,blue-jay,One of the loudest and most colorful birds of ...,"Crows, Magpies, Jays, Perching Birds",Low Concern,"Coasts and Shorelines, Fields, Meadows, and Gr...","California, Eastern Canada, Florida, Great Lak...","Direct Flight, Flap/Glide, Hovering, Undulating",...,"Falling, Flat","Chirp/Chip, Rattle, Raucous, Scream, Whistle",{'Jay calls & clicks': 'https://media.audubon....,"Oak and pine woods, suburban gardens, groves, ...","4-5, sometimes 3-7. Greenish or buff, sometime...",Both parents bring food for nestlings. Young l...,Forages in trees and shrubs and on ground. Com...,Omnivorous. Most of diet is vegetable matter (...,Courtship may involve aerial chases; male may ...,['https://media.audubon.org/nas_birdapi_hero/s...
3,3,Red-tailed Hawk,Buteo jamaicensis,red-tailed-hawk,This is the most widespread and familiar large...,"Hawk-like Birds, Hawks and Eagles",Low Concern,"Coasts and Shorelines, Desert and Arid Habitat...","Alaska and The North, California, Eastern Cana...","Direct Flight, Soaring",...,"Falling, Simple",Scream,{'Typical calls #1': 'https://media.audubon.or...,"Open country, woodlands, prairie groves, mount...","2-3, sometimes 4, rarely 1-5. Whitish, blotche...",Female remains with young most of the time dur...,Does most hunting by watching from a high perc...,"Varied, includes small mammals, birds, reptile...","In courtship, male and female soar in high cir...",['https://media.audubon.org/nas_birdapi_hero/h...
4,4,Great Horned Owl,Bubo virginianus,great-horned-owl,Found almost throughout North America and much...,Owls,Low Concern,"Coasts and Shorelines, Desert and Arid Habitat...","Alaska and The North, California, Eastern Cana...",,...,,,{'Hoots of pair': 'https://media.audubon.org/n...,"Forests, woodlots, streamsides, open country. ...","2-3, sometimes 1-5, rarely 6. Dull whitish. In...",Both parents take part in providing food for y...,"Hunts mostly at night, sometimes at dusk. Watc...","Varied, mostly mammals and birds. Mammals make...",May begin nesting very early in north (late wi...,['https://media.audubon.org/nas_birdapi_hero/w...


In [7]:
main_audio_folder = 'bird_audios'
if not os.path.exists(main_audio_folder):
    os.makedirs(main_audio_folder)

# Iterate over each row in the DataFrame
for index, row in rawbirdata.iterrows():
    # Extract the bird name and the list of image URLs
    bird_audio_name = row['name']
    audio_urls = row['calls_audio']
    print(audio_urls)
    if audio_urls != None:
    
        # Create a folder for the current bird within the main folder
        bird_audio_folder = os.path.join(main_audio_folder, bird_audio_name)
        if not os.path.exists(bird_audio_folder):
            os.makedirs(bird_audio_folder)
        
        # Download images for the current bird
        for name, url in audio_urls.items():
            print(url)
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    # Save the image with a unique name within the bird's folder
                    with open(os.path.join(bird_audio_folder, f'{bird_audio_name}_{name}.mp3'), 'wb') as f:
                        f.write(response.content)
            except Exception as e:
                print(f"Error downloading audio {name} for {bird_audio_name}: {e}")

NameError: name 'rawbirdata' is not defined

In [None]:
if not os.path.exists('bird_ranges'):
    os.makedirs('bird_ranges')

# Sample DataFrame
# Assuming df is your DataFrame with 'name' and 'link_source' columns
# df = pd.read_csv('your_dataframe.csv')  # Load your DataFrame from a CSV file if needed

# Iterate over each row of the DataFrame
for index, row in rawbirdata.iterrows():
    bird_name = row['name']
    image_url = row['range_image']
    
    # Download the image
    response = requests.get(image_url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Save the image to the 'bird_ranges' folder
        with open(f'bird_ranges/{bird_name}.jpg', 'wb') as f:
            f.write(response.content)
        print(f"Image downloaded for {bird_name}")
    else:
        print(f"Failed to download image for {bird_name}")

Image downloaded for Northern Cardinal
Image downloaded for Barred Owl
Image downloaded for Blue Jay
Image downloaded for Red-tailed Hawk


MissingSchema: Invalid URL 'nan': No scheme supplied. Perhaps you meant https://nan?

In [14]:
subset_toHeron = rawbirdata.head(16)
subset_toHeron.head(20)

Unnamed: 0,name,sciname,webname,at_glance,category,conservation,habitat,region,behavior,population,...,call_pattern,call_type,calls_audio,habitat_ext,eggs,young,feeding_behavior,diet,nesting,images
0,Northern Cardinal,Cardinalis cardinalis,northern-cardinal,"One of our most popular birds, the Cardinal is...","Cardinals, Perching Birds",Low Concern,"Arroyos and Canyons, Desert and Arid Habitats,...","Eastern Canada, Florida, Great Lakes, Mid Atla...","Flitter, Formation, Undulating",130.000.000,...,Falling,"Chirp/Chip, Whistle",{'Songs #1': 'https://media.audubon.org/nas_bi...,"Woodland edges, thickets, suburban gardens, to...","3-4, sometimes 2-5. Whitish to pale bluish or ...",Both parents feed nestlings. Young leave nest ...,Forages mostly while hopping on ground or in l...,"Mostly seeds, insects, berries. Diet is quite ...","Male sings to defend nesting territory, active...",[https://media.audubon.org/nas_birdapi_hero/h_...
1,Barred Owl,Strix varia,barred-owl,The rich baritone hooting of the Barred Owl is...,Owls,Low Concern,"Forests and Woodlands, Freshwater Wetlands, La...","California, Eastern Canada, Florida, Great Lak...","Flap/Glide, Hovering",3.500.000,...,"Complex, Falling, Undulating",Hoot,{'Classic hoots #1': 'https://media.audubon.or...,"Woodlands, wooded river bottoms, wooded swamps...","2-3, rarely 4. White. Incubation is mostly or ...",Female may remain with young much of time at f...,"Hunts by night or day, perhaps most at dawn an...",Mostly small mammals. Eats many mice and other...,Courtship involves both male and female bobbin...,[https://media.audubon.org/nas_birdapi_hero/we...
2,Blue Jay,Cyanocitta cristata,blue-jay,One of the loudest and most colorful birds of ...,"Crows, Magpies, Jays, Perching Birds",Low Concern,"Coasts and Shorelines, Fields, Meadows, and Gr...","California, Eastern Canada, Florida, Great Lak...","Direct Flight, Flap/Glide, Hovering, Undulating",17.000.000,...,"Falling, Flat","Chirp/Chip, Rattle, Raucous, Scream, Whistle",{'Jay calls & clicks': 'https://media.audubon....,"Oak and pine woods, suburban gardens, groves, ...","4-5, sometimes 3-7. Greenish or buff, sometime...",Both parents bring food for nestlings. Young l...,Forages in trees and shrubs and on ground. Com...,Omnivorous. Most of diet is vegetable matter (...,Courtship may involve aerial chases; male may ...,[https://media.audubon.org/nas_birdapi_hero/sf...
3,Red-tailed Hawk,Buteo jamaicensis,red-tailed-hawk,This is the most widespread and familiar large...,"Hawk-like Birds, Hawks and Eagles",Low Concern,"Coasts and Shorelines, Desert and Arid Habitat...","Alaska and The North, California, Eastern Cana...","Direct Flight, Soaring",3.100.000,...,"Falling, Simple",Scream,{'Typical calls #1': 'https://media.audubon.or...,"Open country, woodlands, prairie groves, mount...","2-3, sometimes 4, rarely 1-5. Whitish, blotche...",Female remains with young most of the time dur...,Does most hunting by watching from a high perc...,"Varied, includes small mammals, birds, reptile...","In courtship, male and female soar in high cir...",[https://media.audubon.org/nas_birdapi_hero/h_...
4,Great Horned Owl,Bubo virginianus,great-horned-owl,Found almost throughout North America and much...,Owls,Low Concern,"Coasts and Shorelines, Desert and Arid Habitat...","Alaska and The North, California, Eastern Cana...",,5.700.000,...,,,{'Hoots of pair': 'https://media.audubon.org/n...,"Forests, woodlots, streamsides, open country. ...","2-3, sometimes 1-5, rarely 6. Dull whitish. In...",Both parents take part in providing food for y...,"Hunts mostly at night, sometimes at dusk. Watc...","Varied, mostly mammals and birds. Mammals make...",May begin nesting very early in north (late wi...,[https://media.audubon.org/nas_birdapi_hero/we...
5,House Finch,Haemorhous mexicanus,house-finch,"Adaptable, colorful, and cheery-voiced, House ...","Finches, Perching Birds",Low Concern,"Arroyos and Canyons, Desert and Arid Habitats,...","California, Eastern Canada, Florida, Great Lak...","Flitter, Rapid Wingbeats, Undulating",40.000.000,...,"Complex, Falling, Rising, Undulating","Chirp/Chip, Hi, Trill, Whistle",{'Song #1': 'https://media.audubon.org/nas_bir...,"Cities, suburbs, farms, canyons. Original habi...","4-5, sometimes 2-6. Pale blue, with black and ...",Both parents feed nestlings. Young leave the n...,"Forages on ground, while perching in weeds, or...","Mostly seeds, buds, berries. Almost all of die...",Pairs may begin to form within flocks in winte...,[https://media.audubon.org/nas_birdapi_hero/h_...
6,Gray Catbird,Dumetella carolinensis,gray-catbird,"Rather plain but with lots of personality, the...","Mockingbirds and Thrashers, Perching Birds",Low Concern,"Fields, Meadows, and Grasslands, Forests and W...","California, Eastern Canada, Florida, Great Lak...","Direct Flight, Flitter",29.000.000,...,"Falling, Flat, Undulating","Chatter, Chirp/Chip, Hi, Scream, Whistle",{'Song #1': 'https://media.audubon.org/nas_bir...,"Undergrowth, brush, thorn scrub, suburban gard...","4, sometimes 3-5, rarely 2-6. Greenish blue, r...",Both parents feed the nestlings. Young leave t...,"Does much foraging on ground, flipping leaves ...",Mostly insects and berries. Especially in earl...,"Early in breeding season, male sings constantl...",[https://media.audubon.org/nas_birdapi_hero/we...
7,Mourning Dove,Zenaida macroura,mourning-dove,The mournful cooing of the Mourning Dove is on...,"Pigeon-like Birds, Pigeons and Doves",Low Concern,"Coasts and Shorelines, Desert and Arid Habitat...","Alaska and The North, California, Eastern Cana...",Direct Flight,150.000.000,...,"Falling, Flat, Undulating",Hoot,{'Coo & partial coo': 'https://media.audubon.o...,"Farms, towns, open woods, roadsides, grassland...","2. White. Incubation is by both parents, about...","Both parents feed young ""pigeon milk."" Young l...",Forages mostly on ground; sometimes will perch...,Seeds. Feeds almost entirely on seeds (99% of ...,"In courtship, male flies up with noisy wingbea...",[https://media.audubon.org/nas_birdapi_hero/h_...
8,American Crow,Corvus brachyrhynchos,american-crow,Crows are thought to be among our most intelli...,,Low Concern,,"Alaska and The North, California, Eastern Cana...",Direct Flight,28.000.000,...,"Falling, Flat, Simple","Rattle, Raucous",{'Caws #1': 'https://media.audubon.org/nas_bir...,"Woodlands, farms, fields, river groves, shores...","4-6, sometimes 3-9. Dull blue-green to gray-gr...","Fed by both parents and sometimes by ""helpers....","Opportunistic, quickly taking advantage of new...",Omnivorous. Seems to feed on practically anyth...,"In courtship on ground or in tree, male faces ...",[https://media.audubon.org/nas_birdapi_hero/we...
9,American Robin,Turdus migratorius,american-robin,A very familiar bird over most of North Americ...,"Perching Birds, Thrushes",Low Concern,"Arroyos and Canyons, Coasts and Shorelines, De...","Alaska and The North, California, Eastern Cana...","Direct Flight, Running",370.000.000,...,"Flat, Undulating","Chirp/Chip, Flute, Whistle",{'Song #1': 'https://media.audubon.org/nas_bir...,"Cities, towns, lawns, farmland, forests; in wi...","Usually 4, sometimes 3-7. Pale blue or ""robin'...","Both parents feed young, though female does mo...","Does much foraging on the ground, running and ...","Mostly insects, berries, earthworms. In early ...",Males arrive before females on nesting grounds...,[https://media.audubon.org/nas_birdapi_hero/we...


In [None]:
subset_toHeron.to_csv('subset_birds.csv', index=True)