In [28]:
import io
import csv
import pandas as pd
import datetime
import requests
import altair as alt
import altair_latimes as lat

In [29]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [30]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

Download U.S. Drought Monitor's data

Documentation: https://droughtmonitor.unl.edu/DmData/DataDownload/ComprehensiveStatistics.aspx

### Build REST query

In [31]:
area = "StateStatistics"

In [32]:
statistics_type = "GetDroughtSeverityStatisticsByAreaPercent"

In [33]:
aoi = "06"  #california

In [34]:
drought_level = "D0"

In [35]:
min_threshold = 0

In [36]:
max_threshold = 100

In [37]:
start_date = "1/4/2000"

In [38]:
end_date = pd.to_datetime("today").strftime("%-m/%-d/%Y")
end_date

'6/27/2024'

In [39]:
statistics_format = "1"

In [40]:
url = f"https://usdmdataservices.unl.edu/api/{area}/{statistics_type}?aoi={aoi}&dx={drought_level}&DxLevelThresholdFrom={min_threshold}&DxLevelThresholdTo={max_threshold}&startdate={start_date}&enddate={end_date}&statisticsType={statistics_format}"

### Download the data

**6/27/2024:** Drought Monitor has been changing its data format. Last week the commented out parser worked. Now, the JSON method below is cleanest.

In [41]:
# r = requests.get(url, headers={"Accept": "csv"})
# df = pd.read_csv( io.StringIO(r.text.replace('\r','')), sep=",") 

In [42]:
r = requests.get(url, headers={"Accept": "json"})
json_object = r.json()
df = pd.DataFrame(json_object)

### Process

In [43]:
df["ValidStart"] = pd.to_datetime(df["ValidStart"])

In [58]:
df[['None', 'D0', 'D1', 'D2', 'D3', 'D4']] = df[['None', 'D0', 'D1', 'D2', 'D3', 'D4']].astype(float)

In [59]:
trim_df = df[['ValidStart', 'None', 'D0', 'D1', 'D2', 'D3', 'D4']]

In [60]:
rename_df = trim_df.rename(columns={
    "ValidStart":"week_of", 
    'D0': 'Abnormally dry',
    'D1': 'Moderate drought',
    'D2': 'Severe drought',
    'D3': 'Extreme drought',
    'D4': 'Exceptional drought'
})

### Check

In [61]:
melt = pd.melt(
    trim_df, 
    id_vars="ValidStart", 
    value_vars=["D0", "D1", "D2", "D3", "D4"],
    var_name="category",
    value_name="percent_of_area"
)

In [62]:
domain = ['D0', 'D1', 'D2', 'D3', 'D4']
range_ = ['#f5df8b', '#deba6a', '#c48237', '#c26170', '#7b222c']

alt.Chart(melt).mark_area().encode(
    x="ValidStart:T",
    y=alt.Y("percent_of_area", stack=None),
    color=alt.Color('category', scale=alt.Scale(domain=domain, range=range_)),
    order=alt.Order('category', sort='descending')
).properties(title="Drought in California since 2000", width=600)

### Export

In [63]:
rename_df.to_csv("data/drought-monitor-california.csv", index=False)