# Download and Unzip the files from the website

In [3]:
import os
import requests
import gzip
import shutil
from urllib.parse import urljoin
from bs4 import BeautifulSoup

In [4]:
# URL to fetch data from
url = "https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/"

# Send a request to the webpage
response = requests.get(url)

# Parse the HTML page with BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Find all links to files
links = soup.find_all("a", href=True)

# Create 'storm_data' directory if it doesn't exist
if not os.path.exists("storm_data"):
    os.makedirs("storm_data")

# Iterate over each link to download the CSV.gz files
for link in links:
    href = link.get("href")
    if href.endswith(".csv.gz"):
        file_url = urljoin(url, href)  # Full URL to the file
        filename = os.path.join("storm_data", href.split("/")[-1])  # Save path for the .csv.gz file
        
        # Download the .csv.gz file
        with requests.get(file_url, stream=True) as file_response:
            with open(filename, "wb") as file:
                for chunk in file_response.iter_content(chunk_size=8192):
                    file.write(chunk)

        # Unzip the .csv.gz file and save as .csv
        csv_filename = os.path.join("storm_data", href.split("/")[-1].replace(".csv.gz", ".csv"))
        with gzip.open(filename, 'rb') as f_in:
            with open(csv_filename, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
        
        # Optionally, you can delete the .csv.gz file after unzipping if you don't need it anymore
        os.remove(filename)

print("All .csv.gz files have been downloaded and unzipped.")

All .csv.gz files have been downloaded and unzipped.
