In [1]:
import requests
from bs4 import BeautifulSoup
import csv

In [2]:
url = 'https://xeno-canto.org'
response = requests.get(url)

#Get title of the website
soup = BeautifulSoup(response.content, 'html.parser')
title = soup.find('title')
print(title)

<title>xeno-canto :: Sharing wildlife sounds from around the world</title>


In [3]:
pip install requests

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [4]:
import csv

In [5]:
def extract_data():
    base_url = "https://xeno-canto.org"
    url = f"{base_url}/species/browse.php"

    bird_data = []

    while url:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")

        for row in soup.select(".species-list .species"):
            species_name = row.select_one("a").text.strip()
            family_name = row.select_one(".family").text.strip()

            # extracting the subspecies, latest species, latest articles,
            # location, date, recordists, and recording time (if available).
            subspecies = row.select_one(".subspecies").text.strip()
            latest_species = row.select_one(".latest_species").text.strip()
            latest_articles = row.select_one(".latest_articles").text.strip()
            location = row.select_one(".location").text.strip()
            date = row.select_one(".date").text.strip()
            recordists = row.select_one(".recordist").text.strip()
            recording_time = row.select_one(".time").text.strip()

            # Extract the species' taxonomy (order, family, genus, species)
            taxonomy = [item.text.strip() for item in row.select(".taxonomy a")]

            bird_data.append({
                "Species": species_name,
                "Family": family_name,
                "Subspecies": subspecies,
                "Latest Species": latest_species,
                "Latest Articles": latest_articles,
                "Location": location,
                "Date": date,
                "Recordists": recordists,
                "Recording Time": recording_time,
                "Taxonomy": ", ".join(taxonomy),  # Convert taxonomy list to a comma-separated string
            })

        next_page = soup.select_one("li.next a")
        url = base_url + next_page["href"] if next_page else None
        
    print(bird_data)

    return bird_data

def save_to_csv(data, filename):
    keys = data[0].keys() if data else []
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)

if __name__ == "__main__":
    bird_data = extract_data()

    # Print the list of bird species data before saving to CSV
    print("Bird Species Data:")
    for bird in bird_data:
        print(bird)

    # Save the bird species data to CSV
    save_to_csv(bird_data, "bird_data.csv")
    print("CSV file generated successfully.")

[]
Bird Species Data:
CSV file generated successfully.


In [6]:
#Extracting of bird songs

def extract_bird_songs():
    base_url = "https://xeno-canto.org/api/2/recordings"
    bird_songs = []

    page = 1
    while True:
        params = {"query": "q", "page": page}
        response = requests.get(base_url, params=params)

        if response.status_code != 200:
            break

        data = response.json()
        if not data["recordings"]:
            break

        bird_songs.extend(data["recordings"])
        page += 1

    return bird_songs

if __name__ == "__main__":
    bird_songs_data = extract_bird_songs()
    print(f"Total bird songs extracted: {len(bird_songs_data)}")

    # Print the first few entries to get an idea of the data structure
    print(bird_songs_data[:10])


Total bird songs extracted: 9736
[{'id': '676311', 'gen': 'Grallaria', 'sp': 'saturata', 'ssp': '', 'group': 'birds', 'en': 'Equatorial Antpitta', 'rec': 'Galo Real', 'cnt': 'Ecuador', 'loc': 'Papallacta, Quijos, Napo', 'lat': '-0.3766', 'lng': '-78.1603', 'alt': '3400', 'type': '', 'sex': 'male', 'stage': 'adult', 'method': 'field recording', 'url': '//xeno-canto.org/676311', 'file': 'https://xeno-canto.org/676311/download', 'file-name': 'XC676311-Equatorial Antpitta .mp3', 'sono': {'small': '//xeno-canto.org/sounds/uploaded/GHVYOJSPRJ/ffts/XC676311-small.png', 'med': '//xeno-canto.org/sounds/uploaded/GHVYOJSPRJ/ffts/XC676311-med.png', 'large': '//xeno-canto.org/sounds/uploaded/GHVYOJSPRJ/ffts/XC676311-large.png', 'full': '//xeno-canto.org/sounds/uploaded/GHVYOJSPRJ/ffts/XC676311-full.png'}, 'osci': {'small': '//xeno-canto.org/sounds/uploaded/GHVYOJSPRJ/wave/XC676311-small.png', 'med': '//xeno-canto.org/sounds/uploaded/GHVYOJSPRJ/wave/XC676311-med.png', 'large': '//xeno-canto.org/soun

In [7]:
import pandas as pd

In [8]:
#Using pandas to add data to a csv file
df = pd.DataFrame(bird_data)
df.to_csv('bird_species.csv')
df.to_excel('bird_species.xlsx')
print('Saved to file')

Saved to file


In [9]:
#Extracting location and geographical information, time and date and printing it out

def extract_location_data():
    url = "https://xeno-canto.org"
    response = requests.get(url)
    bsobj = BeautifulSoup(response.content, 'html.parser')

    bird_list = bsobj.select('.species-list .species')

    if not bird_list:
        print("No bird data found. Please check the website's structure or try again later.")
        return

    for bird in bird_list:
        location_element = bird.select_one(".location")
        date_element = bird.select_one(".date")
        time_element = bird.select_one(".time")

        if location_element and date_element and time_element:
            location = location_element.text.strip()
            date = date_element.text.strip()
            recording_time = time_element.text.strip()

            print(f"Location: {location}, Date: {date}, Time: {recording_time}")
        else:
            print("Incomplete data for a bird recording. Skipping this entry.")

if __name__ == "__main__":
    extract_location_data()



No bird data found. Please check the website's structure or try again later.
