# Process the GPM NASA rainfall timeseries data

This has been downloaded using the package `gpm_precipitation_tools` for a time period  of monthly data between 2014 and 2020.
The area of study is given by the shapefile `aoi_norway.shp`.

See `instructions.md` for caveats of using this data. 

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import xarray as xr

In [None]:
# load the .nc file with the rainfall timeseries - on xarray + rasterio
dir_path = '../GPM_RAW_MONTH_2014-01-01_2020-12-31_processed/run_start_2014-01-01_end_2020-12-31/'
file_name = "joint_ds_with_all_times.nc"
rainfall = xr.open_dataset(dir_path+file_name)


In [None]:
rainfall

In [None]:
rainfall.precipitation[1]

In [None]:
plt.imshow(rainfall.precipitation[1].squeeze())
plt.colorbar()

Note the coarse resolution of the data!!

#### Ground motion data

In [None]:
# load the list of langitude and longitudes from the ground motion csv 

gm_csv = pd.read_csv('./tmp/160-IW1-414-s1-asc1-v2020.csv')

In [None]:
gm_csv.rename(columns=lambda x: x.strip(), inplace=True)


In [None]:
target_lat = gm_csv['latitude']
target_lon = gm_csv['longitude']

In [None]:
target_lat_list = target_lat[:].tolist()
target_lon_list = target_lon[:].tolist()

In [None]:
# I am going to have to do this one by one ...
# create a new rainfall timeseries dataframe - index by pid 

rainfall_df = pd.DataFrame({'lat':gm_csv['latitude'], 'lon':gm_csv['longitude']})
rainfall_df.index = gm_csv['pid']

In [None]:
target_lon_da = xr.DataArray(target_lon_list[0])

Find the closest precipitation value for each of the ground motion points

In [None]:
rainfall_timeseries = []

for i in range(len(target_lon_list)):
    
    target_lon_da = xr.DataArray(target_lon_list[i])
    target_lat_da = xr.DataArray(target_lat_list[i])

    # Retrieve data at the grid cells nearest to the target latitudes and longitudes
    da_closest_points = rainfall.sel(x=target_lon_da, y=target_lat_da, method="nearest")
    print(da_closest_points.x.values, da_closest_points.y.values)
    rainfall_timeseries.append(da_closest_points)
    
    

In [None]:
time_array = rainfall_timeseries[0].time.values

In [None]:
column_names = gm_csv['pid'].tolist()

In [None]:
rainfall_timeseries_df = pd.DataFrame(columns = column_names)

In [None]:
# add the precipitation values to each of the ground motion points
# this is a new dataframe that has the same column names (pid) as the 
# ground motion so it's easier to match the points. 
for i in range(len(target_lon_list)):
    rainfall_timeseries_df.iloc[:,i] = rainfall_timeseries[i].precipitation.values.squeeze().tolist()

In [None]:
# change the index to the dates
rainfall_timeseries_df.set_index(time_array, inplace=True)

In [None]:
# add a column with the timesteps in increasing order of 1 - this is for the regression
rainfall_timeseries_df['timeseries_step'] = np.arange(0, len(rainfall_timeseries_df))
first_column = rainfall_timeseries_df.pop('timeseries_step')
rainfall_timeseries_df.insert(0, 'timeseries_step', first_column)

### Rainfall timeseries regression

In [None]:
from scipy import stats
# regression for the rainfall timeseries
all_y_reg = []
for i in range(1,len(rainfall_timeseries_df.columns)):
    X = rainfall_timeseries_df.loc[:, ['timeseries_step']]  # features
    y = rainfall_timeseries_df.iloc[:, i]  # target
    x = X.values.squeeze()
    x = x.astype(float)
    y = y.values.astype(float)
    res = stats.linregress(x, y)
    all_y_reg.append(res)


In [None]:
# note that the order follows that of the dataframe columns

# saving all the values of the regression 
all_rvalue = []
all_slope_values = []
all_intercept_values = []
all_pvalue = []
all_stderr = []
all_intercept_stderr = []

for i in range(len(all_y_reg)):
    all_rvalue.append(all_y_reg[i].rvalue)
    all_slope_values.append(all_y_reg[i].slope)
    all_intercept_values.append(all_y_reg[i].intercept)
    all_pvalue.append(all_y_reg[i].pvalue)
    all_stderr.append(all_y_reg[i].stderr)
    all_intercept_stderr.append(all_y_reg[i].intercept_stderr)

    

In [None]:
# create a pandas dataframe with the r values and the same column names (point IDs) as the original dataframe
df_all_rvalue = pd.DataFrame(all_rvalue)
#df_all_rvalue = df_all_rvalue.T
df_all_rvalue.index = rainfall_timeseries_df.columns[1:]

In [None]:
df_all_rvalue['slope'] = all_slope_values
df_all_rvalue['intercept'] = all_intercept_values
df_all_rvalue['pvalue'] = all_pvalue
df_all_rvalue['stderr'] = all_stderr
df_all_rvalue['intercept_stderr'] = all_intercept_stderr
df_all_rvalue = df_all_rvalue.rename(columns={0: 'rvalue'})

In [None]:
df_all_rvalue.index.name = 'pid'

In [None]:
rainfall_timeseries_df.to_csv('rainfall_timeseries_data.csv', index=True)

In [None]:
# save all the values in a csv file 

df_all_rvalue.to_csv('rainfall_regression_values.csv', index=True)


In [None]:
df_linreg = df_all_rvalue

In [None]:
linear_ascending = []
linear_descending = []
linear_constant = []
irregular = []

for i in range(len(df_linreg)):
    if df_linreg.rvalue.iloc[i]>0.9:
        linear_ascending.append(df_linreg.index[i])
    elif df_linreg.rvalue.iloc[i]<-0.9:
        linear_descending.append(df_linreg.index[i])
    elif (df_linreg.rvalue.iloc[i]<0.025 and df_linreg.rvalue.iloc[i]>-0.025) and (-0.01<df_linreg.slope.iloc[i]<0.01):
    #elif (-0.01<df_linreg.slope.iloc[i]<0.01):
        linear_constant.append(df_linreg.index[i])
    else:
        irregular.append(df_linreg.index[i])

In [None]:
print(f'ascending: {len(linear_ascending)}, descending: {len(linear_descending)},\
constant: {len(linear_constant)}, irregular:{len(irregular)}')

Basically, most of the ground motion data points correspond to only a couple of rainfall 
pixels because the resolution is so much coarses in the precipitation data. 

It is very hard to find any correlation between the rainfall and the ground motion data because of this. 

In [None]:
df_all_rvalue['reg_type'] = ""

In [None]:
for i in range(len(linear_ascending)):
    df_all_rvalue.reg_type.loc[linear_ascending[i]] = 'ascending'

In [None]:
for i in range(len(linear_descending)):
    df_all_rvalue.reg_type.loc[linear_descending[i]] = 'descending'

In [None]:
for i in range(len(linear_constant)):
    df_all_rvalue.reg_type.loc[linear_constant[i]] = 'constant'

In [None]:
for i in range(len(irregular)):
    df_all_rvalue.reg_type.loc[irregular[i]] = 'irregular'

In [None]:
df_all_rvalue.to_csv('rainfall_regression_values.csv')

Checking what the rainfall points are doing - The first time this analysis is run for a new set of points it is a good idea to look at what the rainfall values are. I know now that there isn't much of a variation between ground motion points but a priori, it is hard to know. 

In [None]:
plt.plot(rainfall_timeseries_df.index, rainfall_timeseries_df[irregular[4]])


In [None]:
### WIDGET - ASCENDING
from ipywidgets import *

plt.rcParams['figure.figsize'] = [7,4]

column_name = rainfall_timeseries_df.columns[1:]

def plot_data(column_number):
    plt.plot(rainfall_timeseries_df[irregular[column_number]].values)
    y_regression = df_all_rvalue.slope.loc[irregular[column_number]]*x + df_all_rvalue.intercept.loc[irregular[column_number]]
    plt.plot(y_regression)
    plt.xlabel('Time (months)')
    plt.ylabel('Precipitation (mm)')
    plt.title('Irregular')
    
interact(plot_data, column_number=(0,len(irregular)-1,1))

In [None]:
plt.plot(rainfall_timeseries_df.iloc[0, 1:].values.tolist(), '*')

Here there are very clearly only two values for the rainfall corresponding to the two rainfall pixels I have been mentioning. 

In [None]:
my_arr = rainfall_timeseries_df.iloc[0, 1:].values
np.unique(my_arr)

In [None]:
rainfall_timeseries_df.head()

# Plot points in the map

Plotting the extend of the bounding box of the rainfall data and where the ground motion data lies with respect to those points. 

We can see here how much the resolution of these points vary between datasets

In [None]:
#https://georgetsilva.github.io/posts/mapping-points-with-folium/

from shapely.geometry import Point
import folium

# top left corner of the ground motion data
my_point_first = [ 69.324028, 20.193796]
# bottom right corner of the ground motion data
my_point_last = [69.352196, 20.129965]
# 4 corners of the rainfall data grid
limit_bottom_right = [69.221508, 20.62352]
limit_top_left = [69.407627, 19.744829]
limit_top_right = [69.407627, 20.62352]
limit_bottom_left = [69.221508, 19.744829]

map = folium.Map(location=[69.2, 20.2], zoom_start=8)
folium.Marker(my_point_first).add_to(map)
folium.Marker(my_point_last).add_to(map)

folium.Marker(limit_bottom_right).add_to(map)
folium.Marker(limit_top_right).add_to(map)
folium.Marker(limit_top_left).add_to(map)
folium.Marker(limit_bottom_left).add_to(map)
map