# ESTIMATION OF POPULATION EXPOSURE TO WIND SWATH

In [1]:
from pmap import *

In [2]:
import numpy as np

In [3]:
from mpl_toolkits.basemap import Basemap

In [4]:
import matplotlib.pyplot as plt 

In [5]:
import pyresample

In [6]:
import matplotlib

In [7]:
import matplotlib as mpl

In [8]:
import pandas as pd
import numpy as np
import urllib2,urllib


In [9]:
%matplotlib inline

## Read population data

In [10]:
# population data file
filename='../../POPEXPO/lspop20141.tif'

In [11]:
dataset=gdal.Open(filename) 

In [12]:
dataset.GetProjection()

'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433],AUTHORITY["EPSG","4326"]]'

In [13]:
width=dataset.RasterXSize
width

43200

In [14]:
height=dataset.RasterYSize
height

21600

In [15]:
gt=dataset.GetGeoTransform()
gt

(-180.0, 0.0083333333333333, 0.0, 89.99999999999929, 0.0, -0.0083333333333333)

In [16]:
minx = gt[0]                                                                                                                            
miny = gt[3] + width*gt[4] + height*gt[5]                                                                                               
maxx = gt[0] + width*gt[1] + height*gt[2]                                                                                               
maxy = gt[3]                                                                                                                            
                                                                                                                                          
lon=np.linspace(minx,maxx,width,endpoint=True)                                                                                          
lat=np.linspace(miny,maxy,height,endpoint=True)                                                                                         
                                                                                                                                          

In [17]:
lat=lat[::-1]

In [18]:
v = dataset.GetRasterBand(1)

In [19]:
nan=v.GetNoDataValue()

In [20]:
gdal.GetDataTypeName(v.DataType)

'Int32'

In [21]:
v.DataType

5

In [22]:
parallels = np.arange(-90.,90,20.)                                                                                                        
meridians = np.arange(0.,360.,20.)                                                                                                        

# TEST WITH TC

## read an old geotif file

In [99]:
wind=getmap('tmp/wind10mmalakas18w2016091600.tif')

In [100]:
wind

Bunch(CELLSIZE=0.04999999999999716, DRIVER='PCRaster Raster File', GeoTr=(106.75, 0.04999999999999716, 0.0, 53.1, 0.0, -0.04999999999999716), NCOLS=1225, NROWS=942, Proj='', XSTART=106.75, YSTART=53.1, data=array([[-999., -999., -999., ..., -999., -999., -999.],
       [-999., -999., -999., ..., -999., -999., -999.],
       [-999., -999., -999., ..., -999., -999., -999.],
       ..., 
       [-999., -999., -999., ..., -999., -999., -999.],
       [-999., -999., -999., ..., -999., -999., -999.],
       [-999., -999., -999., ..., -999., -999., -999.]], dtype=float32), lats=[], lons=[], nan=-3.4028234663852886e+38)

In [26]:
# compute lat/lon
widthw=wind.NCOLS
heightw=wind.NROWS
gw=wind.GeoTr
wminx = gw[0]                                                                                                                            
wminy = gw[3] + widthw*gw[4] + heightw*gw[5]                                                                                               
wmaxx = gw[0] + widthw*gw[1] + heightw*gw[2]                                                                                               
wmaxy = gw[3]                                                                                                                            
                                                                                                                                          
wlon=np.linspace(wminx,wmaxx,widthw,endpoint=True)                                                                                          
wlat=np.linspace(wminy,wmaxy,heightw,endpoint=True)                                                                                         


In [27]:
w=np.flipud(wind.data)  # the issue of geotiff orientation

In [29]:
mwind=np.ma.masked_array(w,w==-999) # pay attention on the nan value

In [30]:
# full window expand
minlon=wlon.min()
maxlon=wlon.max()
minlat=wlat.min()
maxlat=wlat.max()

In [31]:
xw,yw=np.meshgrid(wlon,wlat)

In order to save memory usage we select the window that actually has data and perform the analysis there

In [32]:
minlon=xw[ w!=-999 ].min()
maxlon=xw[ w!=-999 ].max()
minlat=yw[ w!=-999 ].min()
maxlat=yw[ w!=-999 ].max()

## read a recent HWRF ascii file

In [33]:
r = pd.read_csv('tmp/wind10m.txt',header=0)

In [34]:
r.head()

Unnamed: 0,102.25 154.45 1.45 38.80 0.05 1045 748
0,1.45 102.25 -999.00
1,1.45 102.30 -999.00
2,1.45 102.35 -999.00
3,1.45 102.40 -999.00
4,1.45 102.45 -999.00


In [35]:
[nj,ni]=r.columns[0].split()[-2:]# the last 2 values of the header define grid size nj,ni

In [36]:
ni=np.int(ni)
nj=np.int(nj)
nn=ni*nj

In [37]:
ni,nj

(748, 1045)

In [38]:
wswath=r[:nn]

In [39]:
s=wswath[wswath.columns[0]].str.split(expand=True)

In [40]:
s.columns=['lat','lon','wind']

In [41]:
s.head()

Unnamed: 0,lat,lon,wind
0,1.45,102.25,-999.0
1,1.45,102.3,-999.0
2,1.45,102.35,-999.0
3,1.45,102.4,-999.0
4,1.45,102.45,-999.0


In [42]:
s=s.apply(pd.to_numeric)

In [43]:
yw=s['lat'].values.reshape(ni,nj) 
xw=s['lon'].values.reshape(ni,nj)
wind=s['wind'].values.reshape(ni,nj)

In [44]:
minlon=xw[ wind !=-999 ].min()
maxlon=xw[ wind !=-999 ].max()
minlat=yw[ wind !=-999 ].min()
maxlat=yw[ wind !=-999 ].max()

In [45]:
mwind=np.ma.masked_array(wind,wind==-999)

##### test the retrieval

In [46]:
%%skip
wlevs = [30.,34.,50.,64.,83.,96.,114.,135.] #specify color map to match NOAA's HWRF
colors=['c','green','darkgreen','yellow','orange','r','brown']
cmap=mpl.colors.ListedColormap(colors)
norm=mpl.colors.BoundaryNorm(wlevs,cmap.N)

cmap.set_over(mpl.colors.colorConverter.to_rgb('m'))
cmap.set_under(mpl.colors.colorConverter.to_rgb('w'))

In [47]:
%%skip
plt.figure(figsize=(12,8))
plt.contourf(xw,yw,mwind,wlevs,cmap=cmap,norm=norm,boundaries=[0]+wlevs+[150.],extend='both')
plt.colorbar()
plt.contour(xw,yw,mwind,wlevs)
plt.figtext(0.5,0.03,'MAX WIND (KTS) '+np.str(wind.max()))

### geoplot

In [48]:
m = Basemap(projection='cyl',llcrnrlat=minlat,urcrnrlat=maxlat, llcrnrlon=minlon,urcrnrlon=maxlon,resolution='l')    

In [49]:
%%skip
m.contourf(xw,yw,mwind,20,cmap=plt.cm.RdBu_r,animated=True)
m.colorbar()
m.drawcoastlines(linewidth=1.5)                                                                                                           
m.drawparallels(parallels)                                                                                                                
m.drawmeridians(meridians)                                                                                                                

## compute the range of indeces to extract

Note that the range is -180:180. However when we read the tiff file we set the initial offset and the size of the x-domain. The reader rewinds in the beginning when the size exceeds the available size. 

In [50]:
i1=np.abs(lon-minlon).argmin()
if lon[i1] > minlon: i1=i1-1
i2=np.abs(lon-maxlon).argmin()
if lon[i2] < maxlon: i2=i2+1
                                                                                                                                          
j1=np.abs(lat-minlat).argmin()
if lat[j1] > minlat: j1=j1-1
j2=np.abs(lat-maxlat).argmin()
if lat[j2] < maxlat: j2=j2+1   

In [51]:
print i1,i2,j1,j2

35201 38928 9522 7207


extract from the population file

In [52]:
pop=v.ReadAsArray(i1,j2,i2-i1,j1-j2)

In [53]:
mpop=np.ma.masked_array(pop,pop==nan) # mask the nan values

In [54]:
#create a lat/lon for analysis and ploting
X=lon[i1:i2]                                                                                                               
Y=lat[j2:j1]                                                                                                               
                                                                                                                                          
x,y=np.meshgrid(X,Y)

### plot

In [55]:
%%skip
m.contourf(x,y,mpop,20,cmap=plt.cm.RdBu_r,animated=True)

## project wind to pop

In [56]:
orig=pyresample.geometry.SwathDefinition(lons=xw,lats=yw) # the wind grid geometry

In [57]:
targ=pyresample.geometry.SwathDefinition(lons=x,lats=y) # the pop grid geometry

Now we resamble the wind (orig) to the pop grid (targ)....

In [58]:
air_near=pyresample.kd_tree.resample_nearest(orig,np.flipud(mwind),targ,radius_of_influence=500000,fill_value=-999)

In [59]:
print air_near.shape, pop.shape # check that the new resambled grid is the same as the pop grid

(2315, 3727) (2315, 3727)


In [60]:
%%skip
#plot
m.imshow(air_near,interpolation='nearest')
m.colorbar()
m.drawcoastlines(linewidth=1.5)                                                                                                           
m.drawparallels(parallels)                                                                                                                
m.drawmeridians(meridians)                                                                                                                

#### resample with gauss

This is a different option in resampling by using nearest gauss weighting. For more see https://pyresample.readthedocs.io/en/latest/swath.html

In [61]:
%%skip
air_gauss=pyresample.kd_tree.resample_gauss(orig,np.flipud(mwind),targ,radius_of_influence=500000,neighbours=10, sigmas=250000, fill_value=-999)

In [62]:
%%skip
m.imshow(air_gauss,interpolation='nearest')
m.colorbar()
m.drawcoastlines(linewidth=1.5)                                                                                                           
m.drawparallels(parallels)                                                                                                                
m.drawmeridians(meridians)                                                                                                                


### Classify wind

Now we classify the wind speed based on Saffir-Simpson scale (others can be used as well)

In [63]:
reference = np.array([0,33,63,82,95,112,136,1000]) 
output_labels = ['TD', 'TS' ,'CAT. 1','CAT. 2' , 'CAT. 3', 'CAT. 4', 'CAT. 5'] #The value assigned to each interval

In [64]:
sort_idx = np.argsort(reference) # saving the scale indeces 

In [65]:
pos = np.searchsorted(reference[sort_idx], air_near, side = 'left') # sort the wind array

In [66]:
out = np.argsort(reference)[pos] # restore the correct order of the scale, more info in http://stackoverflow.com/questions/31078160/classify-elements-of-a-numpy-array-using-a-second-array-as-reference

In [67]:
out.max()

5

In [68]:
cmap = matplotlib.colors.ListedColormap(['w','b','g','y','r','purple']) # custom colormap

In [69]:
#cmap = matplotlib.cm.get_cmap('jet',4)

In [70]:
ti=np.arange(0.,6.) # custom range of colormap

In [71]:
%%skip
#plot
m.imshow(out,vmin=0, vmax=6, cmap = cmap, interpolation='nearest')
m.colorbar( ticks=ti )
m.drawcoastlines(linewidth=1.5)                                                                                                           
m.drawparallels(parallels)                                                                                                                
m.drawmeridians(meridians)                                                                                                                

### Compute population exposure

Here we can create appropriate masks of the wind force and apply it to the population array (mpop)

In [72]:
ma5=out>5 # all winds > 5

Note that a 'masked' result means 0 

In [73]:
pop5 = mpop[ma5].sum() # summing the mpop array for the above mask
pop5

masked

In [74]:
ma4=out > 4

In [75]:
pop4 = mpop[ma4].sum()
pop4

masked

In [76]:
ma3=out > 3

In [77]:
pop3 = mpop[ma3].sum()
pop3

1028266

In [78]:
ma2=out > 2

In [79]:
pop2 = mpop[ma2].sum()
pop2

28329714

In [80]:
ma1=(out > 1)

In [81]:
pop1 = mpop[ma1].sum()
pop1

56958351

In [82]:
ma0=(out > 0)

In [83]:
pop0 = mpop[ma0].sum()
pop0

61564714

We can estimate the exposure to a specific wind scale by subtracting. E.g. the population exposed to winds category 3 will be those exposed to more than 2 (pop2) - those exposed to more than 3 (pop3)

In [84]:
pop2-pop3  

27301448

Another way to test this is to mask the original pop array to the wind force and then exclude the nan values

In [85]:
mask= out == 3 # all location with wind force 3

In [86]:
np.ma.masked_less(pop[mask],0).sum() # sould be the same as the above result

27301448

### plot

We can plot the areas of interest as well

In [87]:
mout=np.ma.masked_array(out,mask = out != 3)

In [88]:
%%skip
m.imshow(mout,vmin=0, vmax=6, cmap = cmap, interpolation='nearest')
m.colorbar( ticks=ti )
m.drawcoastlines(linewidth=1.5)                                                                                                           
m.drawparallels(parallels)                                                                                                                
m.drawmeridians(meridians)                                                                                                                


## Real time test

This is function based on the corresponding Notebook (link) to read the wind swath of the day 


In [89]:
def get_wswath(fname):
    path0='ftp://ftp.ncep.noaa.gov/pub/data/nccf/com/hur/prod/'
    response=urllib2.urlopen(path0)
    ls=response.readlines()
    lp=[elem.strip() for elem in ls]
        
    try:
        urllib.urlretrieve(path0+fname, filename='tmp/wind10m.txt')
    except:
        print 'no such file'
        
    r=pd.read_csv('tmp/wind10m.txt',header=0)
    
    [nj,ni]=r.columns[0].split()[-2:]# the last 2 values of the header define grid size
    ni=np.int(ni)
    nj=np.int(nj)
    nn=ni*nj
    
    wswath=r[:nn]
    s=wswath[wswath.columns[0]].str.split(expand=True)
    s.columns=['lat','lon','wind']
    s=s.apply(pd.to_numeric)
    
    lat=s['lat'].values.reshape(ni,nj) 
    lon=s['lon'].values.reshape(ni,nj)
    wind=s['wind'].values.reshape(ni,nj)
    
    return lat,lon,wind

In [90]:
hlat,hlon,hwind = get_wswath('hwrf.2016092300/twenty20w.2016092300.wind10m.ascii')

In [96]:
def get_pop(wlat,wlon,wind):
    filename='../../POPEXPO/lspop20141.tif' #population data filename
    #define the lat/lon window of wind swath
    minlon=wlon[ wind != -999 ].min()
    maxlon=wlon[ wind != -999 ].max()
    minlat=wlat[ wind != -999 ].min()
    maxlat=wlat[ wind != -999 ].max()
    buf=[minlon,maxlon,minlat,maxlat]
    
    pop=getmap(filename,buf) # read population data
    
    mwind=np.ma.masked_array(wind,wind==-999) #mask the wind data for interpolation
    
    wgeo=pyresample.geometry.SwathDefinition(lons=wlon,lats=wlat) # the wind grid geometry
    pgeo=pyresample.geometry.SwathDefinition(lons=pop.lons,lats=pop.lats) # the pop grid geometry
    air_near=pyresample.kd_tree.resample_nearest(wgeo,np.flipud(mwind),pgeo,radius_of_influence=500000,fill_value=-999)
    
    reference = np.array([0,33,63,82,95,112,136,1000]) 
    output_labels = ['TD', 'TS' ,'CAT. 1','CAT. 2' , 'CAT. 3', 'CAT. 4', 'CAT. 5'] #The value assigned to each interval
    sort_idx = np.argsort(reference) # saving the scale indeces 
    pos = np.searchsorted(reference[sort_idx], air_near, side = 'left') # sort the wind array
    out = np.argsort(reference)[pos] # restore the correct order of the scale, more info in http://stackoverflow.com/questions/31078160/classify-elements-of-a-numpy-array-using-a-second-array-as-reference
    
    dic={}
    for i in range(7):
        mask= out == i # wind force i
        try:
            npop = np.ma.masked_less(pop.data[mask],0).sum() # sum of people exposed to wind force i
            if npop is np.ma.masked :
                dic[output_labels[i]] = 0 
            else:
                dic[output_labels[i]] = npop
        except Exception as e :
            print e
    return dic

In [97]:
exposure = get_pop(hlat,hlon,hwind)

In [98]:
exposure

{'CAT. 1': 28628637,
 'CAT. 2': 27301448,
 'CAT. 3': 1028266,
 'CAT. 4': 0,
 'CAT. 5': 0,
 'TD': 229424747,
 'TS': 4606363}