# Flood - Merge Time and Location Information
<h3> 
Aaron Trefler <br/>
JPL <br/>
Created: 06/22/2016 <br/>
</h3>

# I. Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import scipy.io as sio 
import warnings

from flood_functions import grace_brick_convert_lowres

In [3]:
# define directories
dir_flood = '../../Raw Data/Flood Observatory/'
dir_python_data = '../Data/'
dir_grace = '../../Work_Matlab/Data/'
dir_figures = '../Figures/'

# II. Load Data

## GRACE MASCON-CRI

In [4]:
# GRACE MASCON-CRI Matlab files
mat_content_1 = sio.loadmat(dir_grace + \
    'GRCTellus.JPL.200204_201603.GLO.RL05M_1.MSCNv02CRIv02.nc.mat')
mat_content_2 = sio.loadmat(dir_grace + \
    'GRCTellus.JPL.200204_201603.GLO.RL05M_1.MSCNv02CRIv02_dateVectors.nc.mat')
mat_content_3 = sio.loadmat(dir_grace + \
    'LAND_MASK.CRIv01.nc.mat')

In [5]:
# GRACE MASCON-CRI data description
print 'GRACE:', sio.whosmat(dir_grace + \
    'GRCTellus.JPL.200204_201603.GLO.RL05M_1.MSCNv02CRIv02.nc.mat'), '\n'
print 'GRACE Date Vectors:', sio.whosmat(dir_grace + \
    'GRCTellus.JPL.200204_201603.GLO.RL05M_1.MSCNv02CRIv02_dateVectors.nc.mat')

GRACE: [('lat', (360, 1), 'double'), ('lon', (720, 1), 'double'), ('lwe_thickness', (720, 360, 152), 'double'), ('time', (152, 1), 'double'), ('uncertainty', (720, 360, 152), 'double')] 

GRACE Date Vectors: [('time_datenum', (152, 1), 'double'), ('time_datestr', (152,), 'char'), ('time_datestr_cell', (152, 1), 'cell'), ('time_dayOfYear', (152, 1), 'double'), ('time_month', (152, 1), 'double'), ('time_year', (152, 1), 'double')]


In [6]:
# assign imported matlab variables to python objects
grace_lat = mat_content_1['lat']
grace_lon = mat_content_1['lon']
grace_uncertainty = mat_content_1['uncertainty']

grace_time_datestr = mat_content_2['time_datestr']

grace_land_mask = mat_content_3['land_mask']

## Flood Observatory

In [7]:
# FO data previously cleaned by FloodObservatory_Preprocess.py
df_flood_grace = pd.read_csv(dir_python_data + 'df_flood.csv')
df_flood_grace = df_flood_grace.drop('Unnamed: 0', axis=1)
print df_flood_grace.shape
df_flood_grace.tail(1)

(2445, 15)


Unnamed: 0,Register #,Began,Date Began,Ended,Duration in Days,Dead,Displaced,Main cause,Severity *,Affected sq km,Magnitude (M)**,Country,Other,Centroid X,Centroid Y
2444,1905.0,17-Apr-02,17-Apr-02,19-Apr-02,3.0,4.0,0.0,Heavy rain,1.0,919.174,3.4,Indonesia,0,108.216,-7.28558


## CMAP Precipitation

In [15]:
# load pentad time data
f = open(dir_python_data + 'cmap_pentad_precip_time_dict.p', 'rb')
cmap_pentad_precip_time_dict = pickle.load(f)
f.close()

# load monthly time data
f = open(dir_python_data + 'cmap_monthly_precip_time_dict.p', 'rb')
cmap_monthly_precip_time_dict = pickle.load(f)
f.close()

print cmap_pentad_precip_time_dict.keys()
print cmap_monthly_precip_time_dict.keys()

['cmap_precip_time_date_pentad', 'cmap_precip_time_str_pentad']
['cmap_precip_time_date_monthly', 'cmap_precip_time_str_monthly']


# III. Merge Data

## Location

### Map Centroid X to Grace Longitude

In [11]:
df_flood_grace['Centroid GRACE Lon'] = df_flood_grace['Centroid X']
a_centroid_x = df_flood_grace['Centroid GRACE Lon'].values

for i in range(a_centroid_x.size):
    centroid_x = a_centroid_x[i]
    
    if centroid_x < 0:
        centroid_x = 180 - (centroid_x * -1)
        
        # moves negative longitudes to the right of positive longitudes
        a_centroid_x[i] = centroid_x + 180 
        
df_flood_grace['Centroid GRACE Lon'] = a_centroid_x
df_flood_grace['Centroid GRACE Lon'] = (df_flood_grace['Centroid GRACE Lon'] * 2).round()

### Map Centroid Y to Grace Latitude

In [12]:
df_flood_grace['Centroid GRACE Lat'] = df_flood_grace['Centroid Y']
a_centroid_y = df_flood_grace['Centroid GRACE Lat'].values

for i in range(a_centroid_y.size):
    centroid_y = a_centroid_y[i]
    
    a_centroid_y[i] = centroid_y + 90
        
df_flood_grace['Centroid GRACE Lat'] = a_centroid_y
df_flood_grace['Centroid GRACE Lat'] = (df_flood_grace['Centroid GRACE Lat'] * 2).round()

## Time

### CMAP Monthly Precipitation Prior to Flood Event

In [13]:
# Date Association

# convert cmap times to Series
time_datestr_series = pd.Series(cmap_monthly_precip_time_dict['cmap_precip_time_str_monthly'])

# empty vectors
diff = np.zeros(len(time_datestr_series))
min_diff = np.zeros(df_flood_grace.shape[0])
min_diff_ix = np.zeros(df_flood_grace.shape[0])

# find CMAP Precip index prior to each flood
for i in range(df_flood_grace.shape[0]):

    datestr_flood = df_flood_grace['Began'].iloc[i]
    date_flood = dt.datetime.strptime(datestr_flood, "%d-%b-%y")
    
    for j in range(len(time_datestr_series)):
    
        datestr = time_datestr_series.iloc[j]
        date = dt.datetime.strptime(datestr, "%Y-%m-%d")
        
        # diff b/w grace and flood
        diff[j] = (date_flood - date).days
        
        # negative values indicate MASCONs that occur after flood
        diff[diff < 0] = diff.max() + 1
        
    # minimum difference
    min_diff[i] = diff.min()
    min_diff_ix[i] = diff.argmin() #returns index of first occurance of min value
            
# add date association metrics to dataframe
df_flood_grace['Prior CMAP Precip Diff-Days Monthly'] = min_diff
df_flood_grace['Prior CMAP Precip Index Monthly'] = min_diff_ix   

### CMAP Pentad Precipitation Prior to Flood Event

In [16]:
# Date Association

# convert cmap times to Series
time_datestr_series = pd.Series(cmap_pentad_precip_time_dict['cmap_precip_time_str_pentad'])

# empty vectors
diff = np.zeros(len(time_datestr_series))
min_diff = np.zeros(df_flood_grace.shape[0])
min_diff_ix = np.zeros(df_flood_grace.shape[0])

# find CMAP Precip index prior to each flood
for i in range(df_flood_grace.shape[0]):

    datestr_flood = df_flood_grace['Began'].iloc[i]
    date_flood = dt.datetime.strptime(datestr_flood, "%d-%b-%y")
    
    for j in range(len(time_datestr_series)):
    
        datestr = time_datestr_series.iloc[j]
        date = dt.datetime.strptime(datestr, "%Y-%m-%d")
        
        # diff b/w grace and flood
        diff[j] = (date_flood - date).days
        
        # negative values indicate MASCONs that occur after flood
        diff[diff < 0] = diff.max() + 1
        
    # minimum difference
    min_diff[i] = diff.min()
    min_diff_ix[i] = diff.argmin() #returns index of first occurance of min value
            
# add date association metrics to dataframe
df_flood_grace['Prior CMAP Precip Diff-Days Pentad'] = min_diff
df_flood_grace['Prior CMAP Precip Index Pentad'] = min_diff_ix    

### GRACE MASCON Prior to Flood Event

In [None]:
# Date Association

# convert GRACE times to Series
s_grace_time_datestr = pd.Series(grace_time_datestr)

# empty vectors
diff = np.zeros(s_grace_time_datestr.size)
min_diff = np.zeros(df_flood_grace.shape[0])
min_diff_ix = np.zeros(df_flood_grace.shape[0])

# find GRACE MASCON index for each flood
for i in range(df_flood_grace.shape[0]):
    
    date_flood = df_flood_grace['Began'].iloc[i]
    datetime_flood = dt.datetime.strptime(date_flood, "%d-%b-%y")
    
    for j in range(s_grace_time_datestr.size):
        
        date_mascon = s_grace_time_datestr.iloc[j]
        datetime_mascon = dt.datetime.strptime(date_mascon, "%d-%b-%Y")
    
        # diff b/w grace and flood
        diff[j] = (datetime_flood - datetime_mascon).days
        
        # negative values indicate MASCONs that occur after flood
        diff[diff < 0] = diff.max() + 1
        
        
    # minimum difference
    min_diff[i] = diff.min()
    min_diff_ix[i] = diff.argmin() #returns index of first occurance of min value

# add date association metrics to dataframe
df_flood_grace['Prior MASCON Diff-Days'] = min_diff
df_flood_grace['Prior MASCON Index'] = min_diff_ix

### GRACE MASCON After Flood Event

In [None]:
# Post-MASCON Date Association 

# convert GRACE times to Series
s_grace_time_datestr = pd.Series(grace_time_datestr)

# empty vectors
diff = np.zeros(s_grace_time_datestr.size)
min_diff = np.zeros(df_flood_grace.shape[0])
min_diff_ix = np.zeros(df_flood_grace.shape[0])

# find GRACE MASCON index for each flood
for i in range(df_flood_grace.shape[0]):
    
    date_flood = df_flood_grace['Ended'].iloc[i]
    datetime_flood = dt.datetime.strptime(date_flood, "%d-%b-%y")
    
    for j in range(s_grace_time_datestr.size):
        
        date_mascon = s_grace_time_datestr.iloc[j]
        datetime_mascon = dt.datetime.strptime(date_mascon, "%d-%b-%Y")
    
        # diff b/w grace and flood
        diff[j] = (datetime_mascon - datetime_flood).days
        
        # negative values indicate MASCONs that occur after flood
        diff[diff <= 0] = diff.max() + 1
        
        
    # minimum difference
    min_diff[i] = diff.min()
    min_diff_ix[i] = diff.argmin() #returns index of first occurance of min value

# add date association metrics to dataframe
df_flood_grace['Post MASCON Diff-Days'] = min_diff
df_flood_grace['Post MASCON Index'] = min_diff_ix

In [None]:
# replace "Post MASCON Diff-Days" and "Post MASCON Index" 
# with NaN where no MASCON exists post flood
df_flood_grace.loc[0:21,'Post MASCON Diff-Days'] = np.nan
df_flood_grace.loc[0:21,'Post MASCON Index'] = np.nan

In [None]:
df_flood_grace.head(1)

# IV. Remove Flood Events with Large Diff-Days

In [None]:
# Large Difference b/w Flood and Prior GRACE Map
bool_idx = df_flood_grace['Prior MASCON Diff-Days'] <= 15
df_flood_grace_copy = df_flood_grace.loc[bool_idx]
df_flood_grace = df_flood_grace_copy

In [None]:
# Large Difference b/w Flood and Prior CMAP-Precip Map
bool_idx = df_flood_grace['Prior CMAP Precip Diff-Days'] <= 5
df_flood_grace_copy = df_flood_grace.loc[bool_idx]
df_flood_grace = df_flood_grace_copy

In [None]:
print "Number of flood events in dataframe after removals:", df_flood_grace.shape[0]
df_flood_grace.head()

# IV. Low Resolution Land Mask

In [None]:
data_highres = grace_land_mask
scale = 6

dim = grace_land_mask.shape
d1 = dim[0]  
d2 = dim[1]

d1_lowres = d1/scale
d2_lowres = d2/scale

data_lowres = np.empty([d1_lowres,d2_lowres])
data_lowres.fill(np.nan)

for j in range(0,d1,6):
    idx_d1 = (j+5)/6
    r_d1 = range(j,j+5)
    r_d1_beg = min(r_d1)
    r_d1_end = max(r_d1)+1

    for k in range(0,d2,6):
        idx_d2 = (k+5)/6
        r_d2 = range(k,k+5)
        r_d2_beg = min(r_d2)
        r_d2_end = max(r_d2)+1

        # expecting warning when calculating mean of all NaN values
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)    
            
            data_lowres[idx_d1,idx_d2] = \
                np.max(data_highres\
                [r_d1_beg:r_d1_end,r_d2_beg:r_d2_end])
                
grace_land_mask_lowres = data_lowres

# V. Save

In [None]:
df_flood_grace.to_csv('../Data/df_flood_grace_time_location.csv')

# save dataframe for matlabe script: "analysis_flood_visualization.m"
df_flood_grace[['Centroid GRACE Lon', 'Centroid GRACE Lat', 'Prior MASCON Index']].to_csv\
    ('../Data/df_flood_graceLon_graceLat_priorMasconIdx.csv')

In [None]:
# pickle land mask
f = open(dir_python_data + 'grace_land_mask.p', 'wb') 
pickle.dump(grace_land_mask, f, -1)
f = open(dir_python_data + 'grace_land_mask_lowres.p', 'wb') 
pickle.dump(grace_land_mask_lowres, f, -1)

# pickle grace variables
f = open(dir_python_data + 'grace_lat.p', 'wb') 
pickle.dump(grace_lat, f, -1)
f = open(dir_python_data + 'grace_lon.p', 'wb') 
pickle.dump(grace_lon, f, -1)
f = open(dir_python_data + 'grace_uncertainty.p', 'wb') 
pickle.dump(grace_uncertainty, f, -1)
f = open(dir_python_data + 'grace_time_datestr.p', 'wb') 
pickle.dump(grace_time_datestr, f, -1)
f.close()