Scraping sounds recorded here: https://www.nps.gov/glba/learn/nature/soundclips.htm

In [67]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [68]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random
from tqdm import tqdm

In [69]:
url = 'https://www.nps.gov/glba/learn/nature/soundclips.htm'
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")

In [70]:
titles = [sub_soup.string for sub_soup in soup.find(id='cs_idCell2x1x1').find_all('h3')]
titles

[None,
 'Glacier Bay humpback whale song November 2020',
 '2020 humpback whale slaps and vocals with harbor seal',
 'humpback whale feeding call',
 'Humpback whale vocals and loud snaps',
 'Humpback whale bubble-feeding and vocalizing',
 'Outboard Engine Whine and Humpback Whale Song',
 'Glacier Bay 60-second clip humpback whale song November 2020',
 None,
 'Resident Killer Whale Vocalizations',
 'Offshore Killer Whale Vocalizations',
 'Transient Killer Whale Vocalizations',
 'Killer Whale Echolocation Clicks',
 None,
 'Harbor Seal Vocalization - August 7, 2020',
 'Other Animal Sounds',
 'Seabirds Calling at Sea Surface',
 'Seabirds Diving at Sea Surface',
 'Unknown Animal Knock on Hydrophone - "Knockfish"',
 'Vessel Sounds',
 'Cruise Ship Underwater Recording',
 'State Ferry Hydrophone Recording',
 'Freighter Hydrophone Recording',
 'Small Diesel Engine Hydrophone Recording',
 'Outboard Engine Whine and Acceleration',
 'Outboard Engine (60hp) at 10 Knots',
 'Outboard Engine (60hp) at 

In [71]:
#removing None
titles = [item for item in titles if item]
titles

['Glacier Bay humpback whale song November 2020',
 '2020 humpback whale slaps and vocals with harbor seal',
 'humpback whale feeding call',
 'Humpback whale vocals and loud snaps',
 'Humpback whale bubble-feeding and vocalizing',
 'Outboard Engine Whine and Humpback Whale Song',
 'Glacier Bay 60-second clip humpback whale song November 2020',
 'Resident Killer Whale Vocalizations',
 'Offshore Killer Whale Vocalizations',
 'Transient Killer Whale Vocalizations',
 'Killer Whale Echolocation Clicks',
 'Harbor Seal Vocalization - August 7, 2020',
 'Other Animal Sounds',
 'Seabirds Calling at Sea Surface',
 'Seabirds Diving at Sea Surface',
 'Unknown Animal Knock on Hydrophone - "Knockfish"',
 'Vessel Sounds',
 'Cruise Ship Underwater Recording',
 'State Ferry Hydrophone Recording',
 'Freighter Hydrophone Recording',
 'Small Diesel Engine Hydrophone Recording',
 'Outboard Engine Whine and Acceleration',
 'Outboard Engine (60hp) at 10 Knots',
 'Outboard Engine (60hp) at 20 Knots',
 'Propelle

In [72]:
# removing 'Vessel Sounds', 'Ambient Noise' and 'Other Animal Sounds'
titles.remove('Vessel Sounds')
titles.remove('Ambient Noise')
titles.remove('Other Animal Sounds')

In [73]:
titles

['Glacier Bay humpback whale song November 2020',
 '2020 humpback whale slaps and vocals with harbor seal',
 'humpback whale feeding call',
 'Humpback whale vocals and loud snaps',
 'Humpback whale bubble-feeding and vocalizing',
 'Outboard Engine Whine and Humpback Whale Song',
 'Glacier Bay 60-second clip humpback whale song November 2020',
 'Resident Killer Whale Vocalizations',
 'Offshore Killer Whale Vocalizations',
 'Transient Killer Whale Vocalizations',
 'Killer Whale Echolocation Clicks',
 'Harbor Seal Vocalization - August 7, 2020',
 'Seabirds Calling at Sea Surface',
 'Seabirds Diving at Sea Surface',
 'Unknown Animal Knock on Hydrophone - "Knockfish"',
 'Cruise Ship Underwater Recording',
 'State Ferry Hydrophone Recording',
 'Freighter Hydrophone Recording',
 'Small Diesel Engine Hydrophone Recording',
 'Outboard Engine Whine and Acceleration',
 'Outboard Engine (60hp) at 10 Knots',
 'Outboard Engine (60hp) at 20 Knots',
 'Propeller Whine Hydrophone Recording',
 'Heavy Rai

In [74]:
download_links = [sub_soup.find("source")['src'] for sub_soup in soup.find_all(class_='video-js')[1:]]
download_links

['https://www.nps.gov/nps-audiovideo/audiovideo/65381587-aedd-449b-8f4c-7f8bb553be4f.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/b8fc6195-c16f-4508-ba6a-9cb527a85cb2.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/cde82ded-68b8-4c67-b6c3-9ab190908357.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/5635e3d6-1b3a-4c6b-8f90-28fdfbc4155c.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/f7af47fb-4601-4fa3-9274-41be41b93276.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/c1cda635-32ce-4917-ab9b-b09b12dacc5c.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/4e34e6c7-5267-4c0b-8ff9-bf506086aa56.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/e60488c2-30fb-414b-bc9f-ee16f886a897.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/76d2ae6f-616f-4ef1-91a8-d797cbf7cd36.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/d708efda-0bef-4399-b256-b5346836405c.mp3',
 'https://www.nps.gov/nps-audiovideo/audiovideo/2832079a-0115-4b94-b895-185317774abd.mp3',

In [75]:
data = {'title': titles, 'download_link': download_links}
df = pd.DataFrame.from_dict(data)
df

Unnamed: 0,title,download_link
0,Glacier Bay humpback whale song November 2020,https://www.nps.gov/nps-audiovideo/audiovideo/...
1,2020 humpback whale slaps and vocals with harb...,https://www.nps.gov/nps-audiovideo/audiovideo/...
2,humpback whale feeding call,https://www.nps.gov/nps-audiovideo/audiovideo/...
3,Humpback whale vocals and loud snaps,https://www.nps.gov/nps-audiovideo/audiovideo/...
4,Humpback whale bubble-feeding and vocalizing,https://www.nps.gov/nps-audiovideo/audiovideo/...
5,Outboard Engine Whine and Humpback Whale Song,https://www.nps.gov/nps-audiovideo/audiovideo/...
6,Glacier Bay 60-second clip humpback whale song...,https://www.nps.gov/nps-audiovideo/audiovideo/...
7,Resident Killer Whale Vocalizations,https://www.nps.gov/nps-audiovideo/audiovideo/...
8,Offshore Killer Whale Vocalizations,https://www.nps.gov/nps-audiovideo/audiovideo/...
9,Transient Killer Whale Vocalizations,https://www.nps.gov/nps-audiovideo/audiovideo/...


In [76]:
# replacing all ',' by ' -' in order not to mess up with csv conversion
df.replace(',', ' -', inplace=True, regex=True)
df

Unnamed: 0,title,download_link
0,Glacier Bay humpback whale song November 2020,https://www.nps.gov/nps-audiovideo/audiovideo/...
1,2020 humpback whale slaps and vocals with harb...,https://www.nps.gov/nps-audiovideo/audiovideo/...
2,humpback whale feeding call,https://www.nps.gov/nps-audiovideo/audiovideo/...
3,Humpback whale vocals and loud snaps,https://www.nps.gov/nps-audiovideo/audiovideo/...
4,Humpback whale bubble-feeding and vocalizing,https://www.nps.gov/nps-audiovideo/audiovideo/...
5,Outboard Engine Whine and Humpback Whale Song,https://www.nps.gov/nps-audiovideo/audiovideo/...
6,Glacier Bay 60-second clip humpback whale song...,https://www.nps.gov/nps-audiovideo/audiovideo/...
7,Resident Killer Whale Vocalizations,https://www.nps.gov/nps-audiovideo/audiovideo/...
8,Offshore Killer Whale Vocalizations,https://www.nps.gov/nps-audiovideo/audiovideo/...
9,Transient Killer Whale Vocalizations,https://www.nps.gov/nps-audiovideo/audiovideo/...


In [77]:
df.to_csv('/content/drive/MyDrive/lewagon-deepdive/working_environment/01.getting_data/scraping Glacier Bay sounds/glacier_bay_sounds.csv',index=False)