# Download WIFIA Data

## Importing libraries

In [1]:
import logging
import os
import sys

from logging.handlers import TimedRotatingFileHandler

import pandas as pd

## Defining the parameters of the execution

In [2]:
files_to_download = [
  {"url": "https://www.epa.gov/wifia/wifia-letters-interest-table", "file_name": "wifia_letters_submitted.csv"}, 
  {"url": "https://www.epa.gov/wifia/wifia-selected-projects-table", "file_name": "wifia_projects_selected.csv"}, 
  {"url": "https://www.epa.gov/wifia/wifia-closed-loans", "file_name": "wifia_loans_closed.csv"}
]

unzipped_folder = "rawfiles"

fhandler = TimedRotatingFileHandler("logs/wifia_download.log", when="midnight", interval=1)
fhandler.suffix = "%Y%m%d"
logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[fhandler, logging.StreamHandler(sys.stdout)],
    )
logger = logging.getLogger(__name__)

## Downloading files

In [3]:
logger.info("Starting the process to download WIFIA letters...")
for item in files_to_download:
  logger.info(f"Download from url: {item['url']}...")
  df = pd.read_html(item["url"])
  logger.info(f"Saving file: {item['file_name']}...")
  df[0].to_csv(os.path.join(unzipped_folder, item["file_name"]), index_label="seq")
  

2022-10-06 20:16:42,185 - INFO - Starting the process to download WIFIA letters...
2022-10-06 20:16:42,187 - INFO - Download from url: https://www.epa.gov/wifia/wifia-letters-interest-table...
2022-10-06 20:16:46,113 - INFO - Download from url: https://www.epa.gov/wifia/wifia-letters-interest-table...
2022-10-06 20:16:46,155 - INFO - Download from url: https://www.epa.gov/wifia/wifia-selected-projects-table...
2022-10-06 20:16:47,322 - INFO - Download from url: https://www.epa.gov/wifia/wifia-selected-projects-table...
2022-10-06 20:16:47,334 - INFO - Download from url: https://www.epa.gov/wifia/wifia-closed-loans...
2022-10-06 20:16:48,236 - INFO - Download from url: https://www.epa.gov/wifia/wifia-closed-loans...
