## Analysis of HyFlux StormSurge Med results

This analysis is part of an effort to figure out a discepancy with hyflux data and a time phase of 6 hours in the Med Sea storm surge calculation. 

In [None]:
%matplotlib notebook

In [None]:
from matplotlib import animation
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import glob
import numpy as np
from pmap import getmap
import pandas
import matplotlib.pyplot as plt
import datetime

In [None]:
today=datetime.datetime.today()


In [None]:
PATH='/mnt/ECMWF/processed/{}/FIX_MED_SEA/'.format(today.year)

In [None]:
tstamp='20160616.00'

In [None]:
hfiles=glob.glob(PATH+'calc_{}/TIF_H*.tif'.format(tstamp))

test the parsing

In [None]:
print hfiles[0].split('/')[-1].split('_H_')[1].split('.')[0]

In [None]:
t=[]
for k in hfiles:
    t.append(k.split('/')[-1].split('_H_')[1].split('.')[0])

In [None]:
time=np.array(t)
print time

first we parse the bathymetry tif to get the i,j of the point in question 

In [None]:
filename=PATH+'calc_{}/bathymetry.tif'.format(tstamp)
grid = getmap(filename)

gt=grid.GeoTr

width=grid.NCOLS
height=grid.NROWS

minx = gt[0]
miny = gt[3] + width*gt[4] + height*gt[5]
maxx = gt[0] + width*gt[1] + height*gt[2]
maxy = gt[3]

lon=np.linspace(minx,maxx,width,endpoint=True)
lat=np.linspace(miny,maxy,height,endpoint=True)


Now define the lat/lon we want. First we load the file with the observation points we selected.

In [None]:
OFILE='/mnt/pandora/Projects_Critech/EX_2015_CoastAlRisk/maps/data/s_b_data.txt'

In [None]:
obs=pandas.read_csv(OFILE,delimiter='\t')

In [None]:
obs

Let's choose a point e.g. 1858 

In [None]:
plat,plon=obs['latcalc'][12],obs['loncalc'][12]

In [None]:
print plat,plon # check

Now find the i,j for this point

In [None]:
i=np.abs(lat-plat).argmin()
j=np.abs(lon-plon).argmin()
print i,j, lat[i-1:i+2],lon[j-1:j+2]


### NOTE: How did we compute these calc points?

Now we parse the geotif files ad select the i,j value adding it to a new list

In [None]:
hobs=[]

In [None]:
dat=getmap(hfiles[0])
dat.data.shape

Note that we need to flip upside down the data to get the correct i,j (inherit to the geotif coordination system ?)

In [None]:
for ifile in hfiles:
    dat=getmap(ifile)
    hobs.append(np.flipud(dat.data)[i,j])

In [None]:
hobs=np.array(hobs)

In [None]:
hobs

Optionally we can plot. Note that the time stamp on the time array are seconds passed from a certain starting point. How to determine that? One way is to parse the file Calc_input_deck.txt where the InTime and Fin Time is given.

In [None]:
with open(PATH+'calc_{}/Calc_input_deck.txt'.format(tstamp)) as f:
     lines = [line.rstrip('\n') for line in f]
     for atr in lines:
        if 'InTime' in atr: time1 = atr.split('*')[0]
        if 'FinTime' in atr: time2 = atr.split('*')[0]
        if 'DateTsunami' in atr: startime=atr.split('*')[0]
f.close()        

In [None]:
print intime,fintime, startime

In [None]:
intime=float(time1.split('=')[1])
fintime=float(time2.split('=')[1])
stime=startime.split('=')[1].strip()
print intime,fintime,stime

the starting time of this run is ... 

In [None]:
odate=datetime.datetime.strptime(stime,'%d %b %Y %H:%S')
print sdate

So the start time of this particular computation is

In [None]:
print odate+datetime.timedelta(hours=intime)

 while the time stamp on the file is 

In [None]:
print tstamp

thus the computation starts 6 hours before.!!!

### Annual Module

parsing for all folders in this year 

In [None]:
allfolders=glob.glob(PATH+'calc_*')

In [None]:
folders=allfolders[-10:]# last 10 folders

In [None]:
fig = plt.figure(figsize=(10,8))
ax = fig.add_axes([0.1,0.1,0.8,0.8])

plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y %H'))
plt.gca().xaxis.set_major_locator(mdates.HourLocator(byhour=[0,12]))
ax.xaxis_date()

for ifolder in folders: 
    rstamp=ifolder.split('/')[-1].split('_')[1] # the time stamp on the run folder (actual date of the run)
    rdate=datetime.datetime.strptime(rstamp,'%Y%m%d.%H')
    hfiles=glob.glob(ifolder+'/TIF_H*.tif')
    t=[]
    hobs=[]
    for ifile in hfiles:  
        dat=getmap(ifile)
        hobs.append(np.flipud(dat.data)[i,j])
        ti=ifile.split('/')[-1].split('_H_')[1].split('.')[0]
        if ti == 0: # there is a new restart
            t.append(rdate)
            odate=rdate # set new odate
        else:
            t.append(odate+datetime.timedelta(hours=float(ti)/60./60.))
    plt.plot(t,hobs,'o-',label=rdate)

plt.gcf().autofmt_xdate()
plt.legend()
plt.show()

You can see that the elevation for the 6 hours before the run's main time stamp do not match. WHY? What changed? What is the one one will use for a long time series? 

## Compare forcing

Check the forcing files in the time folders.

In [None]:
allfolders=glob.glob(PATH+'tif_*')

In [None]:
folders=allfolders[-10:]

In [None]:
fdic={}  # create a dictionary to store the timestamps of every run(folder)

In [None]:
for ifolder in folders: 
    rstamp=ifolder.split('/')[-1].split('_')[1] # the time stamp on the run folder (actual date of the run)
    rdate=datetime.datetime.strptime(rstamp,'%Y%m%d.%H')
    pfiles=glob.glob(ifolder+'/TIF_PRESS*.tif')
    t=[]
    for ifile in pfiles:
        ti=ifile.split('/')[-1].split('_PRESS_')[1].split('.')[0]
        ta=datetime.datetime.strptime(ti,'%Y%m%d%H%M')
        t.append(ta)
    fdic[rstamp]=t

In [None]:
for m in range(1,np.size(folders)):
    rstamp=folders[m].split('/')[-1].split('_')[1] 
    foldertime=datetime.datetime.strptime(rstamp,'%Y%m%d.%H')
    previous_stamp=folders[m-1].split('/')[-1].split('_')[1]
    pfiles=glob.glob(folders[m]+'/TIF_PRESS*.tif')
    for ifile in pfiles:
        ti=ifile.split('/')[-1].split('_PRESS_')[1].split('.')[0]
        ta=datetime.datetime.strptime(ti,'%Y%m%d%H%M')
        if ta < foldertime and ta in fdic[previous_stamp]:
            print 'check {}'.format(ta)
            dprev=getmap(folders[m-1]+'/TIF_PRESS_'+ti+'.tif')
            dcur=getmap(folders[m]+'/TIF_PRESS_'+ti+'.tif')
            #compare data
            if not np.array_equal(dprev.data,dcur.data) : print 'error in {}'.format(ta)
            

the same thing for U,V, VMAX

In [None]:
for m in range(1,np.size(folders)):
    rstamp=folders[m].split('/')[-1].split('_')[1] 
    foldertime=datetime.datetime.strptime(rstamp,'%Y%m%d.%H')
    previous_stamp=folders[m-1].split('/')[-1].split('_')[1]
    pfiles=glob.glob(folders[m]+'/TIF_U10*.tif')
    for ifile in pfiles:
        ti=ifile.split('/')[-1].split('_U10_')[1].split('.')[0]
        ta=datetime.datetime.strptime(ti,'%Y%m%d%H%M')
        if ta < foldertime and ta in fdic[previous_stamp]:
            print 'check {}'.format(ta)
            dprev=getmap(folders[m-1]+'/TIF_U10_'+ti+'.tif')
            dcur=getmap(folders[m]+'/TIF_U10_'+ti+'.tif')
            #compare data
            if not np.array_equal(dprev.data,dcur.data) : print 'error in {}'.format(ta)
            

In [None]:
for m in range(1,np.size(folders)):
    rstamp=folders[m].split('/')[-1].split('_')[1] 
    foldertime=datetime.datetime.strptime(rstamp,'%Y%m%d.%H')
    previous_stamp=folders[m-1].split('/')[-1].split('_')[1]
    pfiles=glob.glob(folders[m]+'/TIF_V10*.tif')
    for ifile in pfiles:
        ti=ifile.split('/')[-1].split('_V10_')[1].split('.')[0]
        ta=datetime.datetime.strptime(ti,'%Y%m%d%H%M')
        if ta < foldertime and ta in fdic[previous_stamp]:
            print 'check {}'.format(ta)
            dprev=getmap(folders[m-1]+'/TIF_V10_'+ti+'.tif')
            dcur=getmap(folders[m]+'/TIF_V10_'+ti+'.tif')
            #compare data
            if not np.array_equal(dprev.data,dcur.data) : print 'error in {}'.format(ta)
            

In [None]:
for m in range(1,np.size(folders)):
    rstamp=folders[m].split('/')[-1].split('_')[1] 
    foldertime=datetime.datetime.strptime(rstamp,'%Y%m%d.%H')
    previous_stamp=folders[m-1].split('/')[-1].split('_')[1]
    pfiles=glob.glob(folders[m]+'/VMAX_*.tif')
    for ifile in pfiles:
        ti=ifile.split('/')[-1].split('VMAX_')[1].split('.')[0]
        ta=datetime.datetime.strptime(ti,'%Y%m%d%H%M')
        if ta < foldertime and ta in fdic[previous_stamp]:
            print 'check {}'.format(ta)
            dprev=getmap(folders[m-1]+'/VMAX_'+ti+'.tif')
            dcur=getmap(folders[m]+'/VMAX_'+ti+'.tif')
            #compare data
            if not np.array_equal(dprev.data,dcur.data) : print 'error in {}'.format(ta)
            

Visualization

In [None]:
dprev.data

In [None]:
dcur.data

## So input files are the same !!

Here we compare the values on the NETCDF files with the one above from the geotif files. For the retrieval of data from the netcdf see ...