# Drought Detection Modeling

In [1]:
# initalize imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


# set dataset
dataset = 'Data/Tifton-Daily-Data.csv'
df = pd.read_csv(dataset)

## Begin Preprocessing Dataset

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24733 entries, 0 to 24732
Data columns (total 24 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   STATION  24733 non-null  object 
 1   NAME     24733 non-null  object 
 2   DATE     24733 non-null  object 
 3   DAPR     166 non-null    float64
 4   EVAP     6879 non-null   float64
 5   MDPR     169 non-null    float64
 6   MNPN     1673 non-null   float64
 7   MXPN     1672 non-null   float64
 8   PRCP     24556 non-null  float64
 9   SN01     6823 non-null   float64
 10  SN02     6839 non-null   float64
 11  SN03     6837 non-null   float64
 12  SNOW     18293 non-null  float64
 13  SNWD     12604 non-null  float64
 14  SX01     6824 non-null   float64
 15  SX02     6839 non-null   float64
 16  SX03     6839 non-null   float64
 17  TMAX     14698 non-null  float64
 18  TMIN     14692 non-null  float64
 19  TOBS     10645 non-null  float64
 20  WDMV     4478 non-null   float64
 21  WESD     2 n

In [3]:
df.head(5)

Unnamed: 0,STATION,NAME,DATE,DAPR,EVAP,MDPR,MNPN,MXPN,PRCP,SN01,...,SX01,SX02,SX03,TMAX,TMIN,TOBS,WDMV,WESD,WT03,WT04
0,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-11,,,,,,13.7,,...,,,,,,,,,,
1,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-12,,,,,,13.2,,...,,,,,,,,,,
2,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-13,,,,,,0.0,,...,,,,,,,,,,
3,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-14,,,,,,0.0,,...,,,,,,,,,,
4,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-15,,,,,,2.3,,...,,,,,,,,,,


In [4]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
DAPR,166.0,5.403614,3.889347,2.0,3.0,4.0,7.0,28.0
EVAP,6879.0,3.871813,2.319757,0.0,2.0,3.8,5.6,20.8
MDPR,169.0,27.628402,36.196715,0.0,4.8,16.0,35.1,225.0
MNPN,1673.0,13.509205,7.750376,-13.9,7.2,14.4,20.0,37.8
MXPN,1672.0,25.518242,10.466531,-17.8,20.0,27.8,33.3,41.1
PRCP,24556.0,3.590621,11.293748,0.0,0.0,0.0,0.8,287.0
SN01,6823.0,17.900821,7.253399,-4.4,12.2,18.3,24.4,37.8
SN02,6839.0,18.310045,6.669537,-1.1,12.8,18.9,24.4,31.7
SN03,6837.0,19.685578,6.607321,1.7,14.4,20.0,25.6,44.4
SNOW,18293.0,0.011808,1.424218,0.0,0.0,0.0,0.0,191.0


In [5]:
df.isnull().sum(axis=1)
df.isnull().sum(axis=0)

STATION        0
NAME           0
DATE           0
DAPR       24567
EVAP       17854
MDPR       24564
MNPN       23060
MXPN       23061
PRCP         177
SN01       17910
SN02       17894
SN03       17896
SNOW        6440
SNWD       12129
SX01       17909
SX02       17894
SX03       17894
TMAX       10035
TMIN       10041
TOBS       14088
WDMV       20255
WESD       24731
WT03       24732
WT04       24732
dtype: int64

We have quite a lot of null data within our set let's make a heatmap to get a better understanding of where these missing values are located

In [6]:
date_splits = [
    ('year', 0 , 4), ('month', 5, 7), ('day', 8,10)
]


for i in date_splits:
    df[i[0]] = [ d[i[1]:i[2]] for d in df['DATE']]



In [7]:
df.head(5)


Unnamed: 0,STATION,NAME,DATE,DAPR,EVAP,MDPR,MNPN,MXPN,PRCP,SN01,...,TMAX,TMIN,TOBS,WDMV,WESD,WT03,WT04,year,month,day
0,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-11,,,,,,13.7,,...,,,,,,,,2008,7,11
1,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-12,,,,,,13.2,,...,,,,,,,,2008,7,12
2,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-13,,,,,,0.0,,...,,,,,,,,2008,7,13
3,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-14,,,,,,0.0,,...,,,,,,,,2008,7,14
4,US1GATF0002,"TIFTON 5.6 N, GA US",2008-07-15,,,,,,2.3,,...,,,,,,,,2008,7,15


Let's rename columns so they are easier to understand

In [8]:
colsMap = {'STATION': 'Station',
           'NAME': 'Name',
           'DATE': 'Date',
           'DAPR': '# Days in MDPR',
           'EVAP': 'Evaporation of water (mm)',
           'MDPR': 'Multiday percipitation totals (mm)',
           'MNPN': 'Min Temp of Evap-Water (C)',
           'MXPN': 'Max Temp of Evap-Water (C)',
           'PRCP': 'Percipitation (mm)',
           'SN00': 'Min Soil Temperature 5 cm',
           'SN01': 'Min Soil Temperature 10 cm',
           'SN02': 'Min Soil Temperature 20 cm',
           'SNOW': 'Snowfall (mm)',
           'SNWD': 'Snowdepth (mm)',
           'SX00': 'Max Soil Temperature 5 cm',
           'SX01': 'Max Soil Temperature 10 cm',
           'SX02': 'Max Soil Temperature 20 cm',
           'TMAX': 'Max Temp (c)',
           'TMIN': 'Min Temp (c)',
           'TOBS': 'Temperature at Time of Obeservation (c)',
           'WDMV': '23 hr Wind Movement (km)',
           'WESD': 'Water equivalent of snow on ground (mm)',
           'WT02': 'Thunder',
           'WT03': 'Icey Conditions',
           'year': 'Year',
           'month': 'Month',
           'day': 'Day'}

df.rename(columns = colsMap, inplace = True)

In [30]:
# Get Mins
df.min(axis = 0)

Station                                              US1GATF0002
Name                                       TIFTON 1.4 ENE, GA US
Date                                                  1980-12-01
# Days in MDPR                                               0.0
Evaporation of water (mm)                                    0.0
Multiday percipitation totals (mm)                           0.0
Min Temp of Evap-Water (C)                                 -13.9
Max Temp of Evap-Water (C)                                 -17.8
Percipitation (mm)                                           0.0
Min Soil Temperature 10 cm                                  -4.4
Min Soil Temperature 20 cm                                  -1.1
SN03                                                         0.0
Snowfall (mm)                                                0.0
Snowdepth (mm)                                               0.0
Max Soil Temperature 10 cm                                   0.0
Max Soil Temperature 20 c

In [31]:
# Get Maxes
df.max(axis=0)


Station                                      USC00098703
Name                                       TIFTON, GA US
Date                                          2021-12-31
# Days in MDPR                                      28.0
Evaporation of water (mm)                           20.8
Multiday percipitation totals (mm)                 225.0
Min Temp of Evap-Water (C)                          37.8
Max Temp of Evap-Water (C)                          41.1
Percipitation (mm)                                 287.0
Min Soil Temperature 10 cm                          37.8
Min Soil Temperature 20 cm                          31.7
SN03                                                44.4
Snowfall (mm)                                      191.0
Snowdepth (mm)                                       0.0
Max Soil Temperature 10 cm                          44.4
Max Soil Temperature 20 cm                          43.9
SX03                                                43.9
Max Temp (c)                   

In [10]:
# Fill NA with 0's for better processing later
df.fillna(0, inplace = True)

#based on our min and maxes we can sort our set properly around date
df.sort_values(by ='Date', ascending = True, inplace = True)
df.reset_index(drop = True, inplace = True)
df

Unnamed: 0,Station,Name,Date,# Days in MDPR,Evaporation of water (mm),Multiday percipitation totals (mm),Min Temp of Evap-Water (C),Max Temp of Evap-Water (C),Percipitation (mm),Min Soil Temperature 10 cm,...,Max Temp (c),Min Temp (c),Temperature at Time of Obeservation (c),23 hr Wind Movement (km),Water equivalent of snow on ground (mm),Icey Conditions,WT04,Year,Month,Day
0,USC00098703,"TIFTON, GA US",1980-12-01,0.0,1.8,0.0,2.2,15.0,0.0,0.0,...,18.3,1.1,0.0,16.0,0.0,0.0,0.0,1980,12,01
1,USC00098703,"TIFTON, GA US",1980-12-02,0.0,1.8,0.0,2.2,16.1,0.0,0.0,...,20.6,4.4,0.0,40.0,0.0,0.0,0.0,1980,12,02
2,USC00098703,"TIFTON, GA US",1980-12-03,0.0,2.3,0.0,5.0,19.4,0.0,0.0,...,23.3,3.3,0.0,50.0,0.0,0.0,0.0,1980,12,03
3,USC00098703,"TIFTON, GA US",1980-12-04,0.0,3.0,0.0,2.2,14.4,0.0,0.0,...,16.7,2.8,0.0,66.0,0.0,0.0,0.0,1980,12,04
4,USC00098703,"TIFTON, GA US",1980-12-05,0.0,2.0,0.0,1.1,15.6,0.0,0.0,...,18.3,3.3,0.0,56.0,0.0,0.0,0.0,1980,12,05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24728,USC00098703,"TIFTON, GA US",2021-12-29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,25.0,11.1,17.2,0.0,0.0,0.0,0.0,2021,12,29
24729,US1GATF0003,"TIFTON 1.4 ENE, GA US",2021-12-30,0.0,0.0,0.0,0.0,0.0,0.5,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2021,12,30
24730,USC00098703,"TIFTON, GA US",2021-12-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,25.0,17.2,21.1,0.0,0.0,0.0,0.0,2021,12,30
24731,USC00098703,"TIFTON, GA US",2021-12-31,0.0,0.0,0.0,0.0,0.0,3.0,0.0,...,26.7,17.2,21.1,0.0,0.0,0.0,0.0,2021,12,31
