In [1]:
import geopandas as gpd
from shapely.geometry import Point

import netCDF4
from netCDF4 import Dataset
import datetime
import pandas as pd
import numpy as np

In [2]:
features = {'days_medium': 'Number of days in the month with medium flooding (2-yr flood)',
            'days_high': 'Number of days in the month with high flooding (5-yr flood)',
            'days_severe': 'Number of days in the month with severe flooding (20-yr flood)'
           }

def days_medium(f):
    if f==1:
        return 1
    else:
        return 0
    
def days_high(f):
    if f==2:
        return 1
    else:
        return 0
    
def days_severe(f):
    if f==3:
        return 1
    else:
        return 0 
    
def convert_to_int(x):
    try:
        return int(float(x))
    except:
        return np.nan

def format_date(day_of_year):
    d = netCDF4.num2date(time_var[day_of_year],time_var.units)
    year = d.year
    month = d.month
    day = d.day
    return datetime.datetime(year=year, month=month, day=day)        

def gen_monthly(file_name):
    rootgrp = Dataset(file_name, "r", format="NETCDF4")
    time_var = rootgrp.variables['time']
    days = len(rootgrp.variables['time'])
    lats = len(rootgrp.variables['lat'])
    lons = len(rootgrp.variables['lon'])

    converted_dates = []

    for day in range(days):
        converted_dates.append(format_date(day))

    cd = np.array(converted_dates)
    time = np.repeat(cd[:, np.newaxis], lats, axis=1)
    time = np.repeat(time[:,:, np.newaxis], lons, axis=2).flatten()
    lat = np.repeat(rootgrp['lat'][:][np.newaxis, :], days, axis=0)
    lat = np.repeat(lat[:,:,np.newaxis], lons, axis=2).flatten()
    lon = np.repeat(rootgrp['lon'][:][np.newaxis, :], lats, axis = 0)
    lon = np.repeat(lon[np.newaxis, :, :], days, axis=0).flatten()
    flood = rootgrp['flood'][:].flatten()
    out = np.vstack([time, lat.data, lon.data, flood.data]).transpose()
    
    df = pd.DataFrame(out, columns=['datetime','latitude','longitude','flood-index'])
    df['flood-index'] = df['flood-index'].apply(lambda x: convert_to_int(x))
    df['month'] = df.datetime.apply(lambda x: x.month)
    df['year'] = df.datetime.apply(lambda x: x.year)
    df['days_medium'] = df['flood-index'].apply(lambda x: days_medium(x))
    df['days_high'] = df['flood-index'].apply(lambda x: days_high(x))
    df['days_severe'] = df['flood-index'].apply(lambda x: days_severe(x))    
    
    monthly = pd.DataFrame(df.groupby(['year','month','latitude','longitude'])['days_medium','days_high','days_severe'].sum()).reset_index()
    monthly['datetime'] = monthly.apply(lambda row: datetime.datetime(year=int(row['year']),month=int(row['month']),day=1), axis=1)
    del(monthly['year'])
    del(monthly['month'])
    
    return monthly

In [3]:
rootgrp = Dataset("Flood_severity_index_sample.nc", "r", format="NETCDF4")
time_var = rootgrp.variables['time']
days = len(rootgrp.variables['time'])
lats = len(rootgrp.variables['lat'])
lons = len(rootgrp.variables['lon'])

In [4]:
rootgrp['flood']

<class 'netCDF4._netCDF4.Variable'>
float64 flood(time, lat, lon)
    _FillValue: nan
    title: Flood level Severity (medium, high, and severe)
    long_name: Flood Level Severity
    units: unitless
    valid_min: 0
    valid_max: 3
    missing_value: nan
    standard_name: channel_water_flow__flood_volume-flux_severity_index
unlimited dimensions: 
current shape = (365, 120, 250)
filling on

In [5]:
time_var

<class 'netCDF4._netCDF4.Variable'>
int64 time(time)
    units: days since 2010-01-01 00:00:00
    calendar: proleptic_gregorian
unlimited dimensions: 
current shape = (365,)
filling on, default _FillValue of -9223372036854775806 used

In [6]:
days

365

In [7]:
lats

120

In [8]:
lons

250

In [None]:
monthly = gen_monthly('Flood_severity_index_sample.nc')



In [None]:
monthly[monthly['days_high']>0].head()