## This script investigates intensity of rainfall at selected clusters

#### Donghoon Lee @ Nov-11-2019

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio
from shapely.geometry import Point

- Load geolocations of the clusters (provided from Jorge)


In [2]:
# Load clusters information
filn_in = os.path.join('./data/Cluster_27-10-19.csv')
df = pd.read_csv(filn_in, header=0).dropna()
df = df.rename(columns={'distrito':'district',
                       'latitud_a':'y',
                       'longitud_a':'x'})
df.head()

Unnamed: 0,id,region,district,y,x
0,1001,Amazonas,Aramango,-5.4123,-78.43911
1,2001,Amazonas,Aramango,-5.353548,-78.436368
2,3001,Amazonas,Aramango,-5.319083,-78.43484
3,4001,Amazonas,Aramango,-5.462448,-78.451157
4,5001,Amazonas,Aramango,-5.459933,-78.459775


- Save as shapefile

In [3]:
# GeoDataFrame needs a shapely object
df['Coordinates'] = list(zip(df.x, df.y))           # Coordinates
df['Coordinates'] = df['Coordinates'].apply(Point)  # tuples to Shapely's Point
crs = {'init': 'epsg:4326'}
gdf = gpd.GeoDataFrame(df, crs=crs, geometry='Coordinates')
# Write ESRI shapefile
filn_out = os.path.join('data/cluster.shp')
gdf.to_file(filn_out)
print('%s is saved.' % filn_out)

data/cluster.shp is saved.


- Information of shapefile we just generated (./data/cluster.shp)

In [4]:
gdf.head()

Unnamed: 0,id,region,district,y,x,Coordinates
0,1001,Amazonas,Aramango,-5.4123,-78.43911,POINT (-78.43911 -5.4123)
1,2001,Amazonas,Aramango,-5.353548,-78.436368,POINT (-78.43636832999998 -5.353548332)
2,3001,Amazonas,Aramango,-5.319083,-78.43484,POINT (-78.43483999999998 -5.319083333)
3,4001,Amazonas,Aramango,-5.462448,-78.451157,POINT (-78.45115666 -5.462448332)
4,5001,Amazonas,Aramango,-5.459933,-78.459775,POINT (-78.45977499 -5.459933332)


- Load PISCO data and Select time-series of grids that clusters are located
- Create Pandas dataframe

In [5]:
# Load Daily Unstable PISCO v2.1beta (1981-1-1 to 2018-07-31)
dir_out = '/Users/dlee/data/pisco/PISCOp_V2.1_beta/Daily_Products/unstable/'
file_out = dir_out+'piscopd_180731_dlee.npz'
temp = np.load(file_out)
prcp, lat, lon = temp['prcp'], temp['lat'], temp['lon']
nlat = len(lat)
nlon = len(lon)
tim = pd.to_datetime(temp['tim']); tim.freq = 'D'
# Reshape the data to 2D ndarray
prcp = np.transpose(prcp, [0,2,1])
prcp = np.reshape(prcp, [tim.shape[0], nlat*nlon])

# Read transform from sample GeoTiff file
import rasterio
sample = '/Users/dlee/data/pisco/PISCOp_V2.1_beta/Daily_Products/unstable/PISCOpd/PISCOpd_V2.1.1981-01-01.tif'
# Generate index of coordinates
inds = []
with rasterio.open(sample) as src:
    for _, row in gdf.iterrows():
        inds.append(np.ravel_multi_index(src.index(row.x, row.y),\
                                         [nlat,nlon], order='F'))     # Validated mannually
# Select grids of clusters
prcp_pisco = prcp[:,inds]    # (Validated)
# Convert to dataframe
df = pd.DataFrame(prcp_pisco, index=tim, columns=gdf.id)
# Missing data to zero
df[df < 0] = 0

### Analyses requested by Jorge
1. Determine the average rainfall only for the months of February and March from 1980 to 2010 and excluding from the analysis the years 1982, 1983, 1997 and 1998 (years of the occurrence of Niño phenomena)
2. Determine the average of the sum of rainfall only for the months of February and March from 1980 to 2010 and excluding from the analysis the years 1982, 1983, 1997 and 1998 (years of the occurrence of Niño phenomena)
3. Determine the average rainfall only for the months of February and March of the year 2017 (Coastal Niño)
4. Determine the total sum of rainfall only for the months of February and March of the year 2017 (Coastal Niño)
5. The previous analyzes can be done with the Pisco raster files for all of Peru (that information would serve us for future analysis) and then extract the information for each of my cluster (3584), or you can also extract it only for my cluster. (attached file of my cluster)

In [6]:
# Climatological monthly precipitation (1980-2010)
pmean = df[df.index < '2011-01-01'].resample('M').mean()
psum = df[df.index < '2011-01-01'].resample('M').sum()
# Exclude 1983 and 1998 (supposed extremely wet by El Niño)
if True:
    pmean = pmean[~np.isin(pmean.index.year, [1983, 1998])]
    psum = psum[~np.isin(psum.index.year, [1983, 1998])]
# Averaged and sum of Jan/Feb/Mar/Jam-Mar 1980-2010 precipitation
p28m1 = pmean[pmean.index.month==1].mean()     # Averaged January rainfall during 1980-2010 (1983,1998 removed)
p28m2 = pmean[pmean.index.month==2].mean()     # Averaged Feburary rainfall during 1980-2010 (1983,1998 removed)
p28m3 = pmean[pmean.index.month==3].mean()     # Averaged March rainfall during 1980-2010 (1983,1998 removed)
p28m13 = (p28m1+p28m2+p28m3)/3                 # Averaged Jan-Mar rainfall during 1980-2010 (1983,1998 removed)
p28s1 = psum[psum.index.month==1].mean()       # Averaged sum of January rainfall during 1980-2010 (1983,1998 removed)
p28s2 = psum[psum.index.month==2].mean()       # Averaged sum of Feburary rainfall during 1980-2010 (1983,1998 removed)
p28s3 = psum[psum.index.month==3].mean()       # Averaged sum of March rainfall during 1980-2010 (1983,1998 removed)
p28s13 = p28s1+p28s2+p28s3                     # Averaged sum of Feb-Mar rainfall during 1980-2010 (1983,1998 removed)
# Averaged and sum of Feb/Mar/Feb-Mar 2017 precipitation
pmean17 = df[df.index.year == 2017].resample('M').mean()
psum17 = df[df.index.year == 2017].resample('M').sum()
p17m1 = pmean17[pmean17.index.month==1].mean() # Averaged 2017 January rainfall
p17m2 = pmean17[pmean17.index.month==2].mean() # Averaged 2017 Feburary rainfall
p17m3 = pmean17[pmean17.index.month==3].mean() # Averaged 2017 March rainfall
p17m13 = (p17m1+p17m2+p17m3)/3                 # Averaged 2017 Jan-Mar rainfall
p17s1 = psum17[psum17.index.month==1].mean()   # Sum of 2017 Janurary rainfall
p17s2 = psum17[psum17.index.month==2].mean()   # Sum of 2017 Feburary rainfall
p17s3 = psum17[psum17.index.month==3].mean()   # Sum of 2017 March rainfall
p17s13 = p17s1+p17s2+p17s3                     # Sum of 2017 Jan-Mar rainfall
# Summary
data = np.vstack([p28m1,p28m2,p28m3,p28m13,p28s1,p28s2,p28s3,p28s13,\
                  p17m1,p17m2,p17m3,p17m13,p17s1,p17s2,p17s3,p17s13]).transpose()
columns = ['p28m1','p28m2','p28m3','p28m13','p28s1','p28s2','p28s3','p28s13',
           'p17m1','p17m2','p17m3','p17m13','p17s1','p17s2','p17s3','p17s13']
result = pd.DataFrame(data,index=pmean.columns,columns=columns)
# Save results
result.to_csv('./data/result.csv')

Hi Jorge,

Please find the reuslt file (/data/result.csv). In the name of the column:
- "p" represent precipitation, so all variables start with "p"
- "28" means average value during 1980-2010, except 1983 and 1998.
- "17" means 2017 value
- "m" means average value
- "s" means summation value
- "1", "2", "3", and "13" represent Janunary, Feburary, March, and Jan-Mar, respectively

For example,
- p28m2: Averaged Feburary rainfall during 1980-2010 (1983,1998 removed)
- p28s13: Averaged sum of Feb-Mar rainfall during 1980-2010 (1983,1998 removed)
- p17m13: Averaged 2017 Jan-Mar rainfall

Also, I found that 31 clusters are out of the PISCO grids. For example,
![example of the clusters out of PISCO grid](./data/outofgrid.png)
