# Billion Dollar Disasters Data Scrape

### In this notebook, I scrape data from the NOAA website on Billion Dollar Disasters (disasters that have cost over an estimated $1 billion). I use Playwright and BeautifulSoup in Python.

In [1]:
from bs4 import BeautifulSoup

In [2]:
from playwright.async_api import async_playwright

In [3]:
import os

playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=False)

In [4]:
page = await browser.new_page()

In [5]:
await page.goto("https://www.ncei.noaa.gov/access/billions/events/US/1980-2024?disasters[]=wildfire")

<Response url='https://www.ncei.noaa.gov/access/billions/events/US/1980-2024?disasters[]=wildfire' request=<Request url='https://www.ncei.noaa.gov/access/billions/events/US/1980-2024?disasters[]=wildfire' method='GET'>>

In [6]:
html = await page.content()

In [7]:
soup_doc = BeautifulSoup(html)

In [8]:
all_disasters_html = soup_doc.find(class_='hscrollbox')

In [9]:
all_disasters = all_disasters_html.find('tbody').find_all('tr')

In [10]:
all_disasters[24]

<tr class="all-disasters severe-storm TX SCR GCS SP TA adj" data-disaster="severe-storm" data-year="2024" id="event-20240528-20240527-severe-storm1" role="row" style="display: none;"><td class="event-name"><a href="/access/monitoring/monthly-report/national/202402"><div class="name">Texas Hail Storms</div><div class="dates">May 2024</div></a></td><td class="disaster-type" data-sortval="Severe Storm"><span class="ind severe-storm-ind" title="Severe Storm"></span>Severe Storm</td><td class="date beg-date right" data-sortval="20240527">May 27, 2024</td><td class="date end-date right primary" data-sortval="20240528">May 28, 2024</td><td class="details">Golfball to softball-sized hail caused extensive damage across north and east Texas. Some of these hail storms impacted major cities including Dallas and Houston where homes, vehicles, businesses and other infrastructure were damaged.</td><td class="cost" data-sortval="2.3">$2.3 <sup><a class="btn small ciac" data-bs-target="#ci-display" dat

In [11]:
count = 0

disaster_list = []

for disaster_html in all_disasters: 
    disaster_dict = {}
    count = count + 1
    event_name = disaster_html.find(class_='event-name').find(class_='name').text
    #print(event_name)
    disaster_type = disaster_html.find(class_='disaster-type').find('span').next
    #print(disaster_type)
    if disaster_html.find(class_='monthAbbr'):
        begin_month = disaster_html.find(class_='monthAbbr').text
        begin_day_year = disaster_html.find(class_='monthLong').next_sibling
        begin_date = begin_month + begin_day_year
        #print(begin_date)
    elif disaster_html.find(class_='date beg-date right'):
        begin_date = disaster_html.find(class_='date beg-date right').text
        #print(begin_date)
    if disaster_html.find(class_='date end-date right primary').find('span'):
        end_month = disaster_html.find(class_='date end-date right primary').find('span').text
        end_day_year = disaster_html.find(class_='date end-date right primary').find_all('span')[1].next_sibling
        end_date = end_month + end_day_year 
        #print(end_date)
    elif disaster_html.find(class_='date end-date right primary'):
        end_date = disaster_html.find(class_='date end-date right primary').text
        #print(end_date)
    else: 
        print("ERROR!!!")
        print(count)
    details = disaster_html.find(class_='details').text
    cost = disaster_html.find(class_='cost').text
    deaths = disaster_html.find(class_='deaths').text

    disaster_dict['event_name'] = event_name
    disaster_dict['disaster_type'] = disaster_type
    disaster_dict['begin_date'] = begin_date
    disaster_dict['end_date'] = end_date
    disaster_dict['details'] = details
    disaster_dict['cost'] = cost
    disaster_dict['deaths'] = deaths

    disaster_list.append(disaster_dict)

disaster_list[0:5]

[{'event_name': 'Southern/Eastern/Northwestern Drought and Heat Wave',
  'disaster_type': 'Drought',
  'begin_date': 'Jan 1, 2024',
  'end_date': 'Dec 31, 2024',
  'details': 'Drought conditions impacted many Southern, Eastern and Northwestern states. This drought was more transient in its impacts over numerous states throughout the year. The states of Texas, Oklahoma and Kansas had some of the highest losses to crops from the effects of drought and heat. As the drought changed in intensity and duration throughout the year across several regions of the country. Several Northwestern states also had costly impacted to agriculture including Montana, Idaho and Washington. Numerous southern and eastern states from Mississippi through Pennsylvania also experienced crop impacts that were most severe in the Summer months. The drought conditions also strengthen through the Fall and Winter months impacting Maryland, Delaware and New Jersey. It was also one of the hottest years on record for a nu

In [12]:
import pandas as pd

df = pd.json_normalize(disaster_list)
df.head()

Unnamed: 0,event_name,disaster_type,begin_date,end_date,details,cost,deaths
0,Southern/Eastern/Northwestern Drought and Heat...,Drought,"Jan 1, 2024","Dec 31, 2024","Drought conditions impacted many Southern, Eas...",$5.4 CI,136
1,Southern/Eastern/Northwestern Drought and Heat...,Drought,"Jan 1, 2024","Dec 31, 2024","Drought conditions impacted many Southern, Eas...",$5.3 CI,136
2,Hurricane Milton,Tropical Cyclone,"Oct 9, 2024","Oct 10, 2024",Category 3 Hurricane Milton with 120 mph susta...,$34.3 CI,32
3,Hurricane Milton,Tropical Cyclone,"Oct 9, 2024","Oct 10, 2024",Category 3 Hurricane Milton with 120 mph susta...,$34.3 CI,32
4,Hurricane Helene,Tropical Cyclone,"Sep 24, 2024","Sep 29, 2024",Category 4 Hurricane Helene with 140 mph susta...,$78.7 CI,219


In [13]:
df.shape

(749, 7)

In [14]:
df.dtypes

event_name       object
disaster_type    object
begin_date       object
end_date         object
details          object
cost             object
deaths           object
dtype: object

In [15]:
df.to_csv('billion_dollar_disasters_scrape.csv', index=False)