## This script investigates intensity of rainfall at selected clusters

#### Donghoon Lee @ Oct-28-2019

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import geopandas as gpd
import rasterio
from shapely.geometry import Point

In [2]:
filn_in = os.path.join('./data/Cluster_27-10-19.csv')
df = pd.read_csv(filn_in, header=0).dropna()
df = df.rename(columns={'distrito':'district',
                       'latitud_a':'y',
                       'longitud_a':'x'})
df.head()

Unnamed: 0,id,region,district,y,x
0,1001,Amazonas,Aramango,-5.4123,-78.43911
1,2001,Amazonas,Aramango,-5.353548,-78.436368
2,3001,Amazonas,Aramango,-5.319083,-78.43484
3,4001,Amazonas,Aramango,-5.462448,-78.451157
4,5001,Amazonas,Aramango,-5.459933,-78.459775


In [3]:
# GeoDataFrame needs a shapely object
df['Coordinates'] = list(zip(df.x, df.y))           # Coordinates
df['Coordinates'] = df['Coordinates'].apply(Point)  # tuples to Shapely's Point
crs = {'init': 'epsg:4326'}
gdf = gpd.GeoDataFrame(df, crs=crs, geometry='Coordinates')

# Write ESRI shapefile
filn_out = os.path.join('data/cluster.shp')
gdf.to_file(filn_out)
print('%s is saved.' % filn_out)

data/cluster.shp is saved.


Information of shapefile we just generated (./data/cluster.shp)

In [6]:
gdf.head()

Unnamed: 0,id,region,district,y,x,Coordinates
0,1001,Amazonas,Aramango,-5.4123,-78.43911,POINT (-78.43911 -5.4123)
1,2001,Amazonas,Aramango,-5.353548,-78.436368,POINT (-78.43636832999998 -5.353548332)
2,3001,Amazonas,Aramango,-5.319083,-78.43484,POINT (-78.43483999999998 -5.319083333)
3,4001,Amazonas,Aramango,-5.462448,-78.451157,POINT (-78.45115666 -5.462448332)
4,5001,Amazonas,Aramango,-5.459933,-78.459775,POINT (-78.45977499 -5.459933332)


Load PISCO data and Select time-series of grids that clusters are located

In [7]:
# Load Daily Unstable PISCO v2.1beta (1981-1-1 to 2018-07-31)
dir_out = '/Users/dlee/data/pisco/PISCOp_V2.1_beta/Daily_Products/unstable/'
file_out = dir_out+'piscopd_180731_dlee.npz'
temp = np.load(file_out)
prcp, lat, lon = temp['prcp'], temp['lat'], temp['lon']
nlat = len(lat)
nlon = len(lon)
tim = pd.to_datetime(temp['tim']); tim.freq = 'D'
# Reshape the data to 2D ndarray
prcp = np.transpose(prcp, [0,2,1])
prcp = np.reshape(prcp, [tim.shape[0], nlat*nlon])

# Read transform from sample GeoTiff file
import rasterio
sample = '/Users/dlee/data/pisco/PISCOp_V2.1_beta/Daily_Products/unstable/PISCOpd/PISCOpd_V2.1.1981-01-01.tif'
# Generate index of coordinates
inds = []
with rasterio.open(sample) as src:
    for _, row in gdf.iterrows():
        inds.append(np.ravel_multi_index(src.index(row.x, row.y),\
                                         [nlat,nlon], order='F'))     # Validated mannually

# Select grids of clusters
prcp_pisco = prcp[:,inds]    # (Validated)

In [8]:
# Convert to dataframe
df = pd.DataFrame(prcp_pisco, index=tim, columns=gdf.id)

print(len(tim))
print(len(gdf))
print(df.shape)

13726
3572
(13726, 3572)


In [10]:
# df[[1001,10001]].resample("1m").mean().plot(subplots=True)

# ref = 
a = df[[1001]]
a.index <= '2011-01-01'

array([ True,  True,  True, ..., False, False, False])

In [11]:
# Monthly precipitation climatology (mean and max)
monMean = df.resample("1m").mean()
monMax = df.resample("1m").max()
monMean.head()

id,1001,2001,3001,4001,5001,6001,7001,8001,9001,10001,...,35750025,35760025,35770025,35780025,35790025,35800025,35810025,35820025,35830025,35840025
1981-01-31,4.429976,6.891397,6.891397,4.429976,4.429976,4.429976,1.687374,1.687374,1.687374,1.687374,...,6.963278,6.963278,6.963278,6.963278,6.963278,6.963278,6.963278,6.963278,6.963278,7.865619
1981-02-28,5.036967,5.740608,5.740608,5.036967,5.036967,5.036967,4.753868,4.753868,4.753868,4.753868,...,4.533327,4.533327,4.533327,4.533327,4.533327,4.533327,4.533327,4.533327,4.533327,4.938926
1981-03-31,6.348108,7.220312,7.220312,6.348108,6.348108,6.348108,4.999495,4.999495,4.999495,4.999495,...,3.672001,3.672001,3.672001,3.672001,3.672001,3.672001,3.672001,3.672001,3.672001,3.944103
1981-04-30,2.805945,3.44699,3.44699,2.805945,2.805945,2.805945,3.506111,3.506111,3.506111,3.506111,...,8.088258,8.088258,8.088258,8.088258,8.088258,8.088258,8.088258,8.088258,8.088258,6.850173
1981-05-31,3.973523,5.090751,5.090751,3.973523,3.973523,3.973523,2.482118,2.482118,2.482118,2.482118,...,8.637032,8.637032,8.637032,8.637032,8.637032,8.637032,8.637032,8.637032,8.637032,6.724232


### Tasks
1. Determine the average rainfall only for the months of February and March from 1980 to 2010 and excluding from the analysis the years 1982, 1983, 1997 and 1998 (years of the occurrence of Niño phenomena)

In [12]:
prcp_feb_mean = df[df.index.month==2].mean()
prcp_feb_mean = df[df.index.month==2].max()

In [14]:
prcp_feb_mean

id
1001         72.922752
2001         91.931229
3001         91.931229
4001         72.922752
5001         72.922752
               ...    
35800025     89.631165
35810025     89.631165
35820025     89.631165
35830025     89.631165
35840025    113.940224
Length: 3572, dtype: float64

2. Determine the average of the sum of rainfall only for the months of February and March from 1980 to 2010 and excluding from the analysis the years 1982, 1983, 1997 and 1998 (years of the occurrence of Niño phenomena)

3. Determine the average rainfall only for the months of February and March of the year 2017 (Coastal Niño)

4. Determine the total sum of rainfall only for the months of February and March of the year 2017 (Coastal Niño)

5. The previous analyzes can be done with the Pisco raster files for all of Peru (that information would serve us for future analysis) and then extract the information for each of my cluster (3584), or you can also extract it only for my cluster. (attached file of my cluster)