# US Drought Monitor Statistics by County for CA & CO

Data sourced from the <a href="https://droughtmonitor.unl.edu/DmData/DataDownload/ComprehensiveStatistics.aspx">US Drought Monitor.</a>

## Load output of comprehensive statistics (categorical) by county for CA & CO from 2000 - 2022

In [1]:
import pandas as pd
# Load data, specifying that the FIPS column should be a string to not drop the leading 0
df = pd.read_json ('~/jupyter/sp22Capstone_01_Group02/data/COCountyStatistics.json', dtype={'FIPS':str})
df = df.append(pd.read_json ('~/jupyter/sp22Capstone_01_Group02/data/CACountyStatistics.json', dtype={'FIPS':str}))

# Convert date ints and strings to dates
df.MapDate = pd.to_datetime(df.MapDate, format='%Y%m%d').dt.date
df.ValidStart = pd.to_datetime(df.ValidStart).dt.date
df.ValidEnd = pd.to_datetime(df.ValidEnd).dt.date

# Remove the unnecessary StatisticFormatID column
df.drop('StatisticFormatID',axis=1,inplace=True)

df.head()

Unnamed: 0,MapDate,FIPS,County,State,None,D0,D1,D2,D3,D4,ValidStart,ValidEnd
0,2022-01-18,8001,Adams County,CO,0.0,0.0,0.0,9.59,90.41,0.0,2022-01-18,2022-01-24
1,2022-01-11,8001,Adams County,CO,0.0,0.0,0.0,9.58,90.42,0.0,2022-01-11,2022-01-17
2,2022-01-04,8001,Adams County,CO,0.0,0.0,0.0,0.0,100.0,0.0,2022-01-04,2022-01-10
3,2021-12-28,8001,Adams County,CO,0.0,0.0,0.0,0.0,100.0,0.0,2021-12-28,2022-01-03
4,2021-12-21,8001,Adams County,CO,0.0,0.0,0.0,0.0,100.0,0.0,2021-12-21,2021-12-27


### Note the classifications D0 - D4.  These correspond to drought categories as per the below (taken from <a href="https://droughtmonitor.unl.edu/About/AbouttheData/DroughtClassification.aspx">this</a> page)
### The "none" category indicates the percentage of area not in drought at all

![Drought Classification](./DroughtClassification.png)

### An additional statistic, "Drought Severity and Coverage Index (DSCI)", is also calculated by performing a weighted sum of the drought category percentages using the formula:

### 1(D0) + 2(D1) + 3(D2) + 4(D3) + 5(D4) = DSCI

### More detail <a href="https://droughtmonitor.unl.edu/About/AbouttheData/DSCI.aspx">here.</a>  Possible values range from 0 - 500

In [102]:
df['DSCI'] = df.D0 + (2 * df.D1) + (3 * df.D2) + (4 * df.D3) + (5 * df.D4)

### Rename & rearrange columns

In [103]:
df.columns = ['date','fips','county','state','none_pct','d0_pct','d1_pct','d2_pct','d3_pct','d4_pct',
              'start_date','end_date', 'dsci']

df = df[['date','fips','start_date','end_date','county','state','none_pct','d0_pct','d1_pct','d2_pct','d3_pct','d4_pct',
         'dsci']]

## Create DB table

In [1]:
import getpass
from sqlalchemy.engine.url import URL
from sqlalchemy import create_engine
%reload_ext sql

mypasswd = getpass.getpass()
username = 'dgyw5' # Replace with your pawprint
host = 'pgsql.dsa.lan'
database = 'caponl_22g2'

postgres_db = {'drivername': 'postgres',
               'username': username,
               'password': mypasswd,
               'host': host,
               'database': database}
engine = create_engine(URL(**postgres_db), echo=False)


connection_string = f'postgres://{username}:{mypasswd}@{host}/{database}'
%sql $connection_string
del mypasswd

········


In [109]:
%%sql

drop table if exists drought_stats cascade;

create table drought_stats (
    date date,
    fips varchar(6),
    start_date date,
    end_date date,
    county varchar(25),
    state varchar(2),
    none_pct float,
    d0_pct float,
    d1_pct float,
    d2_pct float,
    d3_pct float,
    d4_pct float,
    dsci float,
    constraint pk_drought_stats primary key (date, fips)
);

 * postgres://dgyw5:***@pgsql.dsa.lan/caponl_22g2
Done.
Done.


[]

## Load data to Postgres DB

In [110]:
df.to_sql ('drought_stats', engine, if_exists='append', index=False, method='multi', chunksize=20000)    

In [112]:
%sql select count(*) from drought_stats

 * postgres://dgyw5:***@pgsql.dsa.lan/caponl_22g2
1 rows affected.


count
140422


In [2]:
%%sql

select state, min(date), max(date)
from drought_stats
group by state

 * postgres://dgyw5:***@pgsql.dsa.lan/caponl_22g2
2 rows affected.


state,min,max
CA,2000-01-04,2022-01-18
CO,2000-01-04,2022-01-18


In [3]:
%sql grant all privileges on drought_stats to nnfd2, dgyw5, jwcp64, gfdbq

 * postgres://dgyw5:***@pgsql.dsa.lan/caponl_22g2
Done.


[]