<a href="https://colab.research.google.com/github/anniebritton/Eco-Drought-South-Dakota/blob/main/NDVI_Data_Processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Colab Set Up**

In [None]:
# installs and import libraries
!pip install earthengine-api --upgrade
!pip install geemap
!pip install geopandas 
!pip install matplotlib

import ee
import geemap.foliumap as geemap
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20,3)

In [None]:
# initialise ee, copy and paste the authentication code
try:
        ee.Initialize()
except Exception as e:
        ee.Authenticate()
        ee.Initialize()

In [3]:
# mount gogole drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Import Data**

In [14]:
df_whole = pd.read_csv('/content/drive/MyDrive/School/M.S./Courses/Capstone/Colab/Data/CSVs/drought_whole_range.csv')
df_NDVI = pd.read_csv('/content/drive/MyDrive/School/M.S./Courses/Capstone/Colab/Data/CSVs/drought_NDVI_range.csv')

In [15]:
df_whole['date'] = pd.to_datetime(df_whole['date'])
df_whole.set_index('date', inplace=True)

df_NDVI['date'] = pd.to_datetime(df_NDVI['date'])
df_NDVI.set_index('date', inplace=True)

**Calculate Anomalies and Resample**

In [16]:
# Function that will calculate the daily mean for each variable and then 
# subsequently calculate the anomaly for each variable/day
def calculate_anomaly(df, value_col):
    
    # Group the data by day of the year and calculate the average for each day of the year
    df_daily_grouping = df.groupby(df.index.dayofyear).mean()

    # Create a dictionary mapping day of year to average value
    day_of_year_to_mean = df_daily_grouping[value_col].to_dict()

    # Map the day of year to the average value for that day of year
    df['day_of_year'] = df.index.dayofyear
    df[f'day_of_year_{value_col}_mean'] = df['day_of_year'].map(day_of_year_to_mean)

    # Calculate the daily anomaly as the difference between the original value and the average value for that day of year
    df[f'{value_col}_anomaly'] = df[value_col] - df[f'day_of_year_{value_col}_mean']

# Apply the function to each column of the dataframe
for col in df_whole.columns:
    if col != 'date':
        calculate_anomaly(df_NDVI, col)

In [17]:
# Create a dataframe that only contains the anomaly data
df_anom = df_NDVI[df_NDVI.columns[df_NDVI.columns.str.endswith('_anomaly')]]

# Resample to the five day average to match gridMET's pentad-structure (5-day averages)
df_anom_5day = df_anom.resample('5D').mean() # THIS DOESN'T WORK - needs to always calculate the drought index values with the five days before hand to be representative. 
# Right now it could be doing something like mean(NA NA value NA NA) when it needs to be (NA NA NA NA value)

df_anom_5day.index = df_anom_5day.index + pd.Timedelta(days=4)
df_anom_5day

Unnamed: 0_level_0,swe_anomaly,tmin_anomaly,tmax_anomaly,precip_anomaly,pdsi_anomaly,eddi30_anomaly,eddi90_anomaly,eddi180_anomaly,spei30_anomaly,spei90_anomaly,spei180_anomaly,spi30_anomaly,spi90_anomaly,spi180_anomaly,smam_anomaly,smpm_anomaly,NDVI_anomaly,PP_NDVI_anomaly
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2000-02-28 00:00:00+00:00,1.095881,7.345490,8.645741,0.141966,1.470454,0.054175,0.069893,0.396130,0.443945,-0.014226,-0.304995,0.791930,0.193798,-0.280463,,,0.061551,0.018989
2000-03-04 00:00:00+00:00,-3.721514,5.281610,6.782128,-0.254201,1.205221,0.545093,0.266891,0.572296,-0.166021,-0.016958,-0.570892,0.334297,0.397312,-0.592174,,,0.063457,0.081712
2000-03-09 00:00:00+00:00,-7.276370,4.153108,4.811861,0.555665,1.135254,0.920134,0.519950,0.828034,-0.458766,-0.246407,-0.982116,0.268848,0.159046,-1.112468,,,-0.002789,0.008393
2000-03-14 00:00:00+00:00,-5.622128,-1.954385,-2.464832,-0.223751,1.310338,0.634538,0.404361,0.822308,1.018282,0.560596,-0.684651,1.549244,1.223321,-0.384201,,,-0.041411,-0.035631
2000-03-19 00:00:00+00:00,-4.111322,-0.906839,-1.115650,0.004584,,,,,,,,,,,,,0.015611,-0.018259
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-14 00:00:00+00:00,-0.147342,3.843822,2.394898,-0.204098,-0.778893,1.252848,1.266859,1.331529,-1.003671,0.129655,-0.249505,0.437877,0.786370,0.640914,-0.019669,-0.045613,-0.113017,-0.066954
2021-12-19 00:00:00+00:00,-2.664924,-3.274593,-0.469399,-0.103758,-0.818220,1.297906,1.355295,1.236945,-0.754666,0.168079,-0.132221,0.502652,0.833527,0.691646,-0.011771,-0.039619,-0.018995,-0.020599
2021-12-24 00:00:00+00:00,-3.920419,5.199928,5.115721,-0.206209,-0.868289,1.341975,1.415076,1.384412,-0.673447,0.088963,-0.186838,0.477266,0.739422,0.593670,0.006378,-0.007403,0.061074,0.025279
2021-12-29 00:00:00+00:00,-5.808820,-3.462318,-2.281510,-0.481583,-0.685349,1.438748,1.116332,1.306408,-0.501507,0.414257,-0.227074,0.604680,0.972511,0.668806,-0.016547,0.013479,0.101719,0.039374


**Graveyard**

In [None]:
# Check if any drough gridMET drough pentad occurs on a leap day (February 29th)
leap_day_mask = (pdsi_df.index.month == 2) & (pdsi_df.index.day == 29)

# Index the dataframe to only include values on leap day
leap_day_df = pdsi_df[leap_day_mask]
leap_day_df