In [17]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd

import sys
import os

sys.path.append(os.path.abspath("../../")) #vscode import 

from settings.settings import USDA_NASS_API_KEY

Export sales

In [None]:
html = requests.get('https://apps.fas.usda.gov/export-sales/h107.htm').text
soup = BeautifulSoup(html, "html.parser")

table = soup.find('table')

rows = []
for tr in table.find_all("tr"):
    cells = [td.get_text(strip=True) for td in tr.find_all("td")]
    rows.append(cells)

cols = []
for i, j in zip(rows[1], rows[2]):
    if f"{i} {j}" not in cols:
        cols.append(f"{i} {j}")
    else:
        cols.append(f"NMY {i} {j}")

data = rows[4:]
df = pd.DataFrame(data, columns=cols)
df = df.dropna()
df.to_csv('../../data/US_export_sales/export_sales.csv', index=False)

Crop progress / condition

In [48]:
def download_data(category):
    endpoint = f"https://quickstats.nass.usda.gov/api/api_GET/?key={USDA_NASS_API_KEY}"
    request_params = f"source_desc=SURVEY&sector_desc=CROPS&group_desc=FIELD CROPS&commodity_desc=WHEAT&statisticcat_desc={category}" \
                    "&agg_level_desc=NATIONAL&class_desc=WINTER" \
                    "&format=JSON"

    url = '&'.join([endpoint, request_params])

    r = requests.get(url)
    df = pd.DataFrame(r.json()['data'])
    return df

In [53]:
conditions = download_data('CONDITION')
progress = download_data('PROGRESS')

conditions.to_csv('../../data/crop_progress/conditions.csv', index=False)
progress.to_csv('../../data/crop_progress/progress.csv', index=False)

Rivers Water Levels

In [None]:
location_dict = {
    "Mississippi River at St. Louis, MO": "USGS-07010000",
    "Ohio River at Cincinnati, OH": "USGS-03255000",
    "Illinois River at Meredosia, IL": "USGS-05585500",
    "Ohio River at Louisville, KY": "USGS-03294500",
}

In [None]:
def download_rivers(location_id):    
    url =  "https://api.waterdata.usgs.gov/ogcapi/v0/collections/daily/items?f=json&lang=en-US&limit=50000&skipGeometry=false&sortby=time&offset=0&" \
          f"monitoring_location_id={location_id}&parameter_code=00065"
    r = requests.get(url)
    data = r.json()['features']
    data_list = [d['properties'] for d in data]
    return pd.DataFrame(data_list)

In [100]:
data_list = []
for k, v in location_dict.items():
    df = download_rivers(v)
    df['location'] = k
    data_list.append(df)
rivers = pd.concat(data_list)

rivers.to_csv('../../data/rivers/rivers_gage_height.csv', index=False)