In [1]:
import io
import csv
import pandas as pd
import datetime
import requests
import altair as alt
import altair_latimes as lat



In [2]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

Download U.S. Drought Monitor's data

Documentation: https://droughtmonitor.unl.edu/DmData/DataDownload/ComprehensiveStatistics.aspx

### Build REST query

In [4]:
area = "StateStatistics"

In [5]:
statistics_type = "GetDroughtSeverityStatisticsByAreaPercent"

In [6]:
aoi = "06"  #california

In [7]:
drought_level = "D0"

In [8]:
min_threshold = 0

In [9]:
max_threshold = 100

In [10]:
start_date = "1/4/2000"

In [11]:
end_date = pd.to_datetime("today").strftime("%-m/%-d/%Y")
end_date

'7/25/2024'

In [12]:
statistics_format = "1"

In [13]:
url = f"https://usdmdataservices.unl.edu/api/{area}/{statistics_type}?aoi={aoi}&dx={drought_level}&DxLevelThresholdFrom={min_threshold}&DxLevelThresholdTo={max_threshold}&startdate={start_date}&enddate={end_date}&statisticsType={statistics_format}"

### Download the data

**6/27/2024:** Drought Monitor has been changing its data format. Last week the commented out parser worked. Now, the JSON method below is cleanest.

In [14]:
# r = requests.get(url, headers={"Accept": "csv"})
# df = pd.read_csv( io.StringIO(r.text.replace('\r','')), sep=",") 

In [47]:
r = requests.get(url, headers={"Accept": "text/json"})
json_object = r.json()
df = pd.DataFrame(json_object)

In [51]:
df.columns = df.columns.str.title()

### Process

In [52]:
df.head(1)

Unnamed: 0,Mapdate,Stateabbreviation,None,D0,D1,D2,D3,D4,Validstart,Validend,Statisticformatid
0,2024-07-23T00:00:00,CA,78.8,21.2,4.44,0.0,0.0,0.0,2024-07-23T00:00:00,2024-07-29T23:59:59,1


In [59]:
df["Validstart"] = pd.to_datetime(df["Validstart"])

In [60]:
df[['None', 'D0', 'D1', 'D2', 'D3', 'D4']] = df[['None', 'D0', 'D1', 'D2', 'D3', 'D4']].astype(float)

In [61]:
trim_df = df[['Validstart', 'None', 'D0', 'D1', 'D2', 'D3', 'D4']]

In [62]:
rename_df = trim_df.rename(columns={
    "Validstart":"week_of", 
    'D0': 'Abnormally dry',
    'D1': 'Moderate drought',
    'D2': 'Severe drought',
    'D3': 'Extreme drought',
    'D4': 'Exceptional drought'
})

### Check

In [63]:
trim_df.head()

Unnamed: 0,Validstart,None,D0,D1,D2,D3,D4
0,2024-07-23,78.8,21.2,4.44,0.0,0.0,0.0
1,2024-07-16,78.8,21.2,1.62,0.0,0.0,0.0
2,2024-07-09,80.72,19.28,0.77,0.0,0.0,0.0
3,2024-07-02,94.25,5.75,0.0,0.0,0.0,0.0
4,2024-06-25,97.18,2.82,0.0,0.0,0.0,0.0


In [64]:
melt = pd.melt(
    trim_df, 
    id_vars='Validstart', 
    value_vars=['D0', 'D1', 'D2', 'D3', 'D4'],
    var_name="category",
    value_name="percent_of_area"
)

In [67]:
domain = ['D0', 'D1', 'D2', 'D3', 'D4']
range_ = ['#f5df8b', '#deba6a', '#c48237', '#c26170', '#7b222c']

alt.Chart(melt).mark_area().encode(
    x="Validstart:T",
    y=alt.Y("percent_of_area", stack=None),
    color=alt.Color('category', scale=alt.Scale(domain=domain, range=range_)),
    order=alt.Order('category', sort='descending')
).properties(title="Drought in California since 2000", width=600, height=400)

### Export

In [66]:
rename_df.to_csv("data/drought-monitor-california.csv", index=False)