In [2]:
import pandas as pd
from matplotlib import pyplot as plt
import xarray as xr
import numpy as np
import urllib.request # module for downloading data from URLs
import chardet

## Data
- We're working with the raw data from: https://www.ncdc.noaa.gov/temp-and-precip/drought/recovery/projected/50-50/3
<br><br>
- Projected Palmer Hydrological Drought Index (3-month outlook) is based on projections of temperature and precipitation from NOAA's Climate Prediction Center
<br><br>
- *Definition*: "Palmer Hydrological Drought Index: measures hydrological impacts of drought (e.g., reservoir levels, groundwater levels, etc.) which take longer to develop and longer to recover from. This long-term drought index was developed to quantify these hydrological effects...." 
<br><br>
- Negative values = some level of drought
<br><br>
- Positive values = no drought

In [3]:
filename = 'projected-phdi-50-50-3.csv'
url = 'https://www.ncdc.noaa.gov/temp-and-precip/drought/recovery/projected-phdi-50-50-3.csv'
urllib.request.urlretrieve(url, filename)

('projected-phdi-50-50-3.csv', <http.client.HTTPMessage at 0x7f8144071e20>)

In [4]:
# Determine encoding, then go line by line through file 
# To see what other processing may be needed!
edata = open('projected-phdi-50-50-3.csv', 'rb').read()
result = chardet.detect(edata)
encode = result['encoding']

# Prints out each line, beginning with the first
for line in (open('projected-phdi-50-50-3.csv',encoding=encode).readlines()):
    print(line)

Normal Conditions Projected PHDI for 20211231 (3-Month Outlook)

The following projected PHDI is based on the most recent monthly precipitation and mean temperature data for NOAA/NCEI Climate Divisions extended by 3 months using NOAA/CPC Seasonal Forecasts of the 50th percentile of mean temperature and 50th percentile of precipitation.

Missing: -99.99

State,Climate Division,value

AL,01,5.01

AL,02,4.20

AL,03,4.62

AL,04,4.05

AL,05,3.51

AL,06,3.33

AL,07,3.78

AL,08,3.04

AZ,01,-2.90

AZ,02,-3.60

AZ,03,-2.90

AZ,04,-2.05

AZ,05,-1.88

AZ,06,-1.31

AZ,07,-1.72

AR,01,1.50

AR,02,1.46

AR,03,1.58

AR,04,2.57

AR,05,1.52

AR,06,2.06

AR,07,3.01

AR,08,2.92

AR,09,2.97

CA,01,-3.09

CA,02,-3.30

CA,03,-3.19

CA,04,-3.18

CA,05,-4.36

CA,06,-4.86

CA,07,-3.33

CO,01,-1.02

CO,02,-3.23

CO,03,0.00

CO,04,-1.42

CO,05,-1.70

CT,01,3.29

CT,02,3.07

CT,03,2.14

DE,01,1.35

DE,02,2.11

FL,01,2.69

FL,02,1.88

FL,03,0.00

FL,04,-1.95

FL,05,-1.64

FL,06,-1.35

FL,07,-2.28

GA,01,4.13

GA,0

In [27]:
# Read in the data
# There are some special things you'll need to do!
# Hint - telling Pandas what values to consider NaNs is one of them!
df = pd.read_csv(
    'projected-phdi-50-50-3.csv',
    header=3,
    na_values=[-99.99]
)


In [28]:
# Check for NaNs!
df.isnull().sum()


State               0
Climate Division    0
value               0
dtype: int64

In [31]:
# What is the average PHDI over Illinois?

# Drought index is contained in the 'value' column

# Yes, there's a north-south gradient in Illinois, so we'd have to be careful interpreting this!
illinois_PHDI = df[(df['State'] == 'IL')]
illinois_PHDI.mean()




  illinois_PHDI.mean()


Climate Division    5.000000
value               1.998889
dtype: float64

In [40]:
# For how many climate divisions in the United States is any level of drought forecasted?

# PHDI < 0

# What percentage of the total number of climate divisions is this?
drought = df[df['value'] < 0]
n_divisions_drought = len(np.unique(drought['Climate Division']))


In [8]:
# IF TIME PERMITS

# Create a histogram of PHDI!

# Interpret!
