In [None]:
# Python Standard Import Statements
# .............................
import matplotlib; matplotlib.use('agg')

import netCDF4
import shapely
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import scipy.stats as stats
import scipy.odr as odr
import scipy.signal as signal
import scipy.integrate as integrate
import importlib
import datetime as dt

# Imports for Polygon Routines
# ..................................

from netCDF4 import Dataset
from shapely import geometry
from shapely import ops
from decimal import Decimal

# Defined Functions to be used in script below
# .................................................

# Function for grabbing coordinates 2PVU contour path
def get_path_coord(contour):
    paths = []
    paths = contour.collections[0].get_paths()
    n = len(paths)
    xy = []
    length = []
    for i in np.arange(0,n,1):
        length.append(len(paths[i]))
    r = np.argmax(length) # gives index of largest contour path
    
    paths1 = paths[r]
    xy = paths1.vertices

    for i in range(len(xy)):
        a = xy[:,0]
        b = xy[:,1]
    return a, b
# ..................................
# Function for finding the index of the closes value
# Input Parameter
# ..................
# A: Array we want the index from
#target: The array we would like to know the indices for

def getnearpos(array,value):
    idx = (np.abs(array-value)).argmin()
    return idx   

# ..................................
# Python code to find longest running 
# sequence of positive integers.

# Problem with code. Doesn't seem to see last large positive consec
# group. Pad data at the end with negative values.
 
def getLongestSeq(a, n):
    maxIdx = 0
    maxLen = 0
    currLen = 0
    currIdx = 0
    
    #check to see if data needs padding at the end
    for k in range(n):
        if a[k] > 0:
            currLen +=1
 
            # New sequence, store
            # beginning index.
            if currLen == 1:
                currIdx = k
        else:
            if currLen > maxLen:
                maxLen = currLen
                maxIdx = currIdx
            currLen = 0
             
    if maxLen > 0:
        print('Index : ',maxIdx,',Length : ',maxLen,)
    else:
        print("No positive sequence detected.")
    return maxIdx, maxLen

# ...............................
# Function start_end_contour
# Function for collecting start and end points of contour
# Input Parameters: fidx (middle index), maxl (maximum length), last (index of last value of array)
# ...........................
# strt_idx: index of first point of pv gradient reversal
# maxl: length of pv gradient reversal
# last: index of last value of array

def start_end_contour(fidx,maxl,last):    
    md_idx = fidx
    end_idx = md_idx + maxl
    strt_idx = md_idx - maxl
    
    if end_idx > last:
        if strt_idx < 0:
            strt_idx1 = 0
            return strt_idx1, md_idx,last
        else:
            return strt_idx, md_idx, last
    else:
        if strt_idx < 0:
            strt_idx1 = 0
            return strt_idx1, md_idx, end_idx
        else:
            return strt_idx, md_idx, end_idx

# ......................................        
# Function retrieve_path
# Function for retrieving path of contour and create Polygon
# Algorithm for retrieving 2-PVU contour paths for each 
# Retrieve 2-PVU contour path.

def retrieve_path(contour):
    paths = []
    paths = contour.collections[0].get_paths()
    n = len(paths)
    xy = []
    length = []
    for i in np.arange(0,n,1):
        length.append(len(paths[i]))
    r = np.argmax(length) # gives index of largest contour path
    
    paths1 = paths[r]
    xy = paths1.vertices

    for i in range(len(xy)):
        a = xy[:,0]
        b = xy[:,1]
    poly = geometry.Polygon([(i[0],i[1]) for i in zip(a,b)])
    
    # find intersection between line and contour segment
    
    return poly

# .............................
# Function pvindex_bnds
# Function for choosing the start point of the PV streamer segment
# Code tests whether p2gradx2 is positive at positive p2grady2
# Input Variables:
# firstx, firsty: index of first value indicated by getLongSeq
# lengthx, lengthy: length of consecutive group of positive values indicated by getLongSeq

# Output Variables:
# strt_idx, end-idx: start and end index bounds of PV streamer
# .........................................

def pvindex_bnds(datax,datay,firstx,firsty,lengthx,lengthy):
    idx1 = np.min([firstx,firsty])  # Idnetify range over which to search
    idx2 = np.max([firstx,firsty])+1
    irange = np.arange(idx1,idx2,1)

    for i in irange:     # Search along positive p2grady2 for index with positive p2gradx2
        if (datay[i]>0 and datax[i]>0):
            strt_idx = i
            pos_length = np.max([lengthx,lengthy])  # Find index detection length (pos_length x 2) of the PV streamer.
            end_idx = strt_idx + (pos_length*2)     # Find end index. 
            mdl_idx = strt_idx + np.min([lengthx,lengthy]) # Find middle index to be used to define PV streamer region.
            
            if end_idx >= len(p2grady2):            # Check whether end_idx is greater than length of contour path
                end_idx = len(p2grady2)-2
            else:
                end_idx = end_idx
            
            if mdl_idx >= len(p2grady2):            # Check whether mdl_idx is greater than length of contour path
                mdl_idx = len(p2grady2)-2
            else:
                mdl_idx = mdl_idx
            break
        else:
            strt_idx = float('NaN')
            mdl_idx = float('NaN')
            end_idx = float('NaN')
            pass
            
    return strt_idx, mdl_idx, end_idx

# ....................................
# Function data_pad
# Pad data with negatives at the end. Section tests whether or not the very last element is positive or negative,
# since getLongSec function doesn't "see" positive end values very well unless part of a significantly large
# sequence. If end value is positive (i.e. p2grady2[-1]>0), then test is padded with -1's.
# ...........................................................
# Store size of p2gradx2 and p2grady2

def data_pad(datax,datay):
    # Tests if padding needed
    if (datay[-1]>0 or datax[-1]>0):
        testy = np.copy(datay)
        testx = np.copy(datax)
        pad = np.repeat(-1,5)
        testy2 = np.hstack([testy,pad])
        testx2 = np.hstack([testx,pad])
    else:
        testy2 = np.copy(datay)
        testx2 = np.copy(datax)
    
    return testx2, testy2

def ncdump(nc_fid, verb=True):
    '''
    ncdump outputs dimensions, variables and their attribute information.
    The information is similar to that of NCAR's ncdump utility.
    ncdump requires a valid instance of Dataset.

    Parameters
    ----------
    nc_fid : netCDF4.Dataset
        A netCDF4 dateset object
    verb : Boolean
        whether or not nc_attrs, nc_dims, and nc_vars are printed

    Returns
    -------
    nc_attrs : list
        A Python list of the NetCDF file global attributes
    nc_dims : list
        A Python list of the NetCDF file dimensions
    nc_vars : list
        A Python list of the NetCDF file variables
    '''
    def print_ncattr(key):
        """
        Prints the NetCDF file attributes for a given key

        Parameters
        ----------
        key : unicode
            a valid netCDF4.Dataset.variables key
        """
        try:
            print ("\t\ttype:", repr(nc_fid.variables[key].dtype))
            for ncattr in nc_fid.variables[key].ncattrs():
                print ('\t\t%s:' % ncattr,\
                      repr(nc_fid.variables[key].getncattr(ncattr)))
        except KeyError:
            print ("\t\tWARNING: %s does not contain variable attributes") % key

    # NetCDF global attributes
    nc_attrs = nc_fid.ncattrs()
    if verb:
        print ("NetCDF Global Attributes:")
        for nc_attr in nc_attrs:
            print ('\t%s:' % nc_attr, repr(nc_fid.getncattr(nc_attr)))
    nc_dims = [dim for dim in nc_fid.dimensions]  # list of nc dimensions
    # Dimension shape information.
    if verb:
        print ("NetCDF dimension information:")
        for dim in nc_dims:
            print ("\tName:", dim) 
            print ("\t\tsize:", len(nc_fid.dimensions[dim]))
            print_ncattr(dim)
    # Variable information.
    nc_vars = [var for var in nc_fid.variables]  # list of nc variables
    if verb:
        print ("NetCDF variable information:")
        for var in nc_vars:
            if var not in nc_dims:
                print ('\tName:', var)
                print ("\t\tdimensions:", nc_fid.variables[var].dimensions)
                print ("\t\tsize:", nc_fid.variables[var].size)
                print_ncattr(var)
    return nc_attrs, nc_dims, nc_vars

def syn_clim(data):
    clim0, clim6, clim12, clim18 = np.empty([73,144]),np.empty([73,144]),np.empty([73,144]),np.empty([73,144])
    std0, std6, std12, std18 = np.empty([73,144]),np.empty([73,144]),np.empty([73,144]),np.empty([73,144])

    for j in np.arange(0,144,1):
        for i in np.arange(0,73,1):
            clim_range = data[np.arange(0,58800,1),i,j]
            dates = pd.date_range('1979-01-01','2019-04-01',freq='6H')
            clim_frame = pd.Series(clim_range,index=dates[0:58800])
            clim = clim_frame.groupby(clim_frame.index.hour).mean()
            clim2 = np.array(clim.values)
            pvstd = clim_frame.groupby(clim_frame.index.hour).std()
            pvstd2 = np.array(pvstd.values)
        
            clim0[i,j], clim6[i,j], clim12[i,j], clim18[i,j] = clim2[0],clim2[1],clim2[2],clim2[3]
            std0[i,j], std6[i,j], std12[i,j], std18[i,j] = pvstd2[0],pvstd2[1],pvstd2[2],pvstd2[3]
    
    return clim0, clim6, clim12, clim18, std0, std6, std12, std18
# .......................................
# NC file input 1

ncfile1='/home/jjpjones/pvs_vws_indices/era5_ucombined_6hrly.nc'
ncf1 = Dataset(ncfile1,'r')
nc_attrs1, nc_dims1, nc_vars1 = ncdump(ncf1)

# Extract data from NetCDF file
lat = ncf1.variables['latitude'][:]
lon = ncf1.variables['longitude'][:]
time = ncf1.variables['time'][:]
lev1 = ncf1.variables['level'][:]
u = ncf1.variables['u'][:]

# List all times in file as datetime objects
time2 = [int(i) for i in time]
dt_time = [dt.date(1900,1,1) + dt.timedelta(hours=t) for t in time2]

# NC file input 2

ncfile2='/home/jjpjones/pvs_vws_indices/era5_pv350k_6hrly_v2.nc'
ncf2 = Dataset(ncfile2,'r')
nc_attrs2, nc_dims2, nc_vars2 = ncdump(ncf2)

pv = ncf2.variables['pv'][:]

# Identify zonal wind levels
u200 = np.copy(u[0:58800,0,0:73,0:144])
u850 = np.copy(u[0:58800,1,0:73,0:144])

vws = u200 - u850;  # deep-layer shear

# Find synoptic climatology
uclim0, uclim6, uclim12, uclim18, ustd0, ustd6, ustd12, ustd18 = syn_clim(vws)
pclim0, pclim6, pclim12, pclim18, pstd0, pstd6, pstd12, pstd18 = syn_clim(pv)

# Define dates dataframe structure
dates = pd.date_range('1979-01-01','2019-04-01',freq='6H')

# Set up empty arrays
vup_full, vdn_full = [], []

for q in np.arange(0,58800,1):
    # Define field and calculate meridional pv gradient field
    pv_field = pv[q,0:73,0:143]
    pv_diffx = np.gradient(pv_field, axis=0)
    pv_diffy = np.gradient(pv_field, axis =1)
    
    # Plot 2PVU contour and retrieve coordinates along path
    cs1 = []
    fig,ax = plt.subplots()
    cs1 = ax.contour(lon[104:143],lat[12:34],pv_field[12:34,104:143],levels=[2e-6])
    a, b = get_path_coord(cs1)
    # ...........................................
    
    # Retrieve indices for contour coordinates
    
    idxlt, idxln = [], []
    for r in b:
        idxlt.append(getnearpos(lat,r))
    
    for r in a:
        idxln.append(getnearpos(lon,r))
    
    idxlt = np.flipud(np.array(idxlt)) # convert to array. Update v2: position indices flipped to move from 
    idxln = np.flipud(np.array(idxln)) # left to right  
    # ...........................................
    
    # Retrieve pv gradient values corresponding to 2PVU contour
    # Added array for zonal pv gradient field

    p2gradx, p2grady = [],[]
    p2gradx.append(pv_diffx[idxlt,idxln])
    p2grady.append(pv_diffy[idxlt,idxln])
    
    # Convert lists to arrays
    p2gradx = np.array(p2gradx)
    p2grady = np.array(p2grady)
    
    # Change from hstack to vstack
    p2gradx2 = p2gradx[0,:]
    p2grady2 = p2grady[0,:]
    
    # ...........................................
    # Update v2: Data padding moved to user-defined function data_pad. pvindex_bnds replaces
    # start_end_contour.
    # Find largest group of consecutive positive values
    # Create temporary array (test) and pad data with negatives 
    # at the end.
    
    # Section tests whether or not the very last element is positive or negative
    # since getLongSec function doesn't "see" positive end values very well. If 
    # end value is positive (i.e. p2grad2[-1]>0), then test is padded with -1's.
    testx, testy = data_pad(p2gradx2,p2grady2) 
    
    
    # Check for a large group of consecutive positive integers in zonal meridional pv gradient
    #test
    pidxy1, plenty1 = getLongestSeq(testy,len(testy))
    pidxx1, plentx1 = getLongestSeq(testx,len(testx))
   
    if (plenty1 >1 and plentx1 >1):
        strt1, mid1, end1 = pvindex_bnds(testx,testy,pidxx1,pidxy1,plentx1,plenty1)
    else:
        print('Step pvindex_bnds: No primary PV detected at timestep: '+ str(q))
        strt1 = float('NaN')
        mid1 = float('NaN')
        end1 = float('NaN')

    # ...........................................
    # After obtaining start, middle and end locations of PV streamer
    # Find standardized anomalies along locations upstream and downstream of first PVS
    # Upstream defined as strt to mid indices; downstream defined as mid+1 to end indices
    
    if np.isnan(strt1) == False:
        if not mid1 == end1:
            mid12 = mid1 + 1
            if dates[q].hour == 0:
                vup_std1 = np.sum((vws[q,idxlt[strt1:mid1],idxln[strt1:mid1]] - uclim0[idxlt[strt1:mid1],idxln[strt1:mid1]])/ustd0[idxlt[strt1:mid1],idxln[strt1:mid1]])
                vdn_std1 = np.sum((vws[q,idxlt[mid12:end1],idxln[mid12:end1]] - uclim0[idxlt[mid12:end1],idxln[mid12:end1]])/ustd0[idxlt[mid12:end1],idxln[mid12:end1]])
            elif dates[q].hour == 6:
                vup_std1 = np.sum((vws[q,idxlt[strt1:mid1],idxln[strt1:mid1]] - uclim6[idxlt[strt1:mid1],idxln[strt1:mid1]])/ustd6[idxlt[strt1:mid1],idxln[strt1:mid1]])
                vdn_std1 = np.sum((vws[q,idxlt[mid12:end1],idxln[mid12:end1]] - uclim6[idxlt[mid12:end1],idxln[mid12:end1]])/ustd6[idxlt[mid12:end1],idxln[mid12:end1]])
            elif dates[q].hour == 12:
                vup_std1 = np.sum((vws[q,idxlt[strt1:mid1],idxln[strt1:mid1]] - uclim12[idxlt[strt1:mid1],idxln[strt1:mid1]])/ustd12[idxlt[strt1:mid1],idxln[strt1:mid1]])
                vdn_std1 = np.sum((vws[q,idxlt[mid12:end1],idxln[mid12:end1]] - uclim12[idxlt[mid12:end1],idxln[mid12:end1]])/ustd12[idxlt[mid12:end1],idxln[mid12:end1]])
            elif dates[q].hour == 18:
                vup_std1 = np.sum((vws[q,idxlt[strt1:mid1],idxln[strt1:mid1]] - uclim18[idxlt[strt1:mid1],idxln[strt1:mid1]])/ustd18[idxlt[strt1:mid1],idxln[strt1:mid1]])
                vdn_std1 = np.sum((vws[q,idxlt[mid12:end1],idxln[mid12:end1]] - uclim18[idxlt[mid12:end1],idxln[mid12:end1]])/ustd18[idxlt[mid12:end1],idxln[mid12:end1]])
            else:
                vup_std1 = float('NaN')
                vdn_std1 = float('NaN')
        else:
            print('No PV detected.')
            vup_std1 = float('NaN')
            vdn_std1 = float('NaN') 
    else:
        print('No PV detected.')
        vup_std1 = float('NaN')
        vdn_std1 = float('NaN') 
             
        
    # ....................................................................
    # Check for a second major group of consecutive positive integers by
    # removing first group (replaced with -1's)
    if np.isnan(strt1) == False:
        zero_pad1 = np.abs(end1 - strt1)
        y_remaining = np.copy(testy)
        y_remaining[strt1:end1] = np.repeat(-1,zero_pad1)

        x_remaining = np.copy(testx)
        x_remaining[strt1:end1] = np.repeat(-1,zero_pad1)

        pidxy2, plenty2 = getLongestSeq(y_remaining,len(y_remaining))
        pidxx2, plentx2 = getLongestSeq(x_remaining,len(x_remaining))
    
        # Check to make sure that lengths of group are greater than 0. 
        if (plenty2 > 1 and plentx2 > 1):
            strt2, mid2, end2 = pvindex_bnds(x_remaining,y_remaining,pidxx2,pidxy2,plentx2,plenty2)
        else:
            print('Step 2nd pvindex_bnds: No primary PV detected at timestep: '+ str(q))
            strt2 = float('NaN')
            mid2 = float('NaN')
            end2 = float('NaN')
            
        if np.isnan(strt2) == False:
            mid22 = mid2 + 1
            if not mid2 == end2: 
                if dates[q].hour == 0:
                    vup_std2 = np.sum((vws[q,idxlt[strt2:mid2],idxln[strt2:mid2]] - uclim0[idxlt[strt2:mid2],idxln[strt2:mid2]])/ustd0[idxlt[strt2:mid2],idxln[strt2:mid2]])
                    vdn_std2 = np.sum((vws[q,idxlt[mid22:end2],idxln[mid22:end2]] - uclim0[idxlt[mid22:end2],idxln[mid22:end2]])/ustd0[idxlt[mid22:end2],idxln[mid22:end2]])
                elif dates[q].hour == 6:
                    vup_std2 = np.sum((vws[q,idxlt[strt2:mid2],idxln[strt2:mid2]] - uclim6[idxlt[strt2:mid2],idxln[strt2:mid2]])/ustd6[idxlt[strt2:mid2],idxln[strt2:mid2]])
                    vdn_std2 = np.sum((vws[q,idxlt[mid22:end2],idxln[mid22:end2]] - uclim6[idxlt[mid22:end2],idxln[mid22:end2]])/ustd6[idxlt[mid22:end2],idxln[mid22:end2]])
                elif dates[q].hour == 12:
                    vup_std2 = np.sum((vws[q,idxlt[strt2:mid2],idxln[strt2:mid2]] - uclim12[idxlt[strt2:mid2],idxln[strt2:mid2]])/ustd12[idxlt[strt2:mid2],idxln[strt2:mid2]])
                    vdn_std2 = np.sum((vws[q,idxlt[mid22:end2],idxln[mid22:end2]] - uclim12[idxlt[mid22:end2],idxln[mid22:end2]])/ustd12[idxlt[mid22:end2],idxln[mid22:end2]])
                elif dates[q].hour == 18:
                    vup_std2 = np.sum((vws[q,idxlt[strt2:mid2],idxln[strt2:mid2]] - uclim18[idxlt[strt2:mid2],idxln[strt2:mid2]])/ustd18[idxlt[strt2:mid2],idxln[strt2:mid2]])
                    vdn_std2 = np.sum((vws[q,idxlt[mid22:end2],idxln[mid22:end2]] - uclim18[idxlt[mid22:end2],idxln[mid22:end2]])/ustd18[idxlt[mid22:end2],idxln[mid22:end2]])
                else:
                    vup_std2 = float('NaN')
                    vdn_std2 = float('NaN')
            else:
                print('No PV detected.')
                vup_std2 = float('NaN')
                vdn_std2 = float('NaN')
        else:
            print('No PV detected.')
            vup_std2 = float('NaN')
            vdn_std2 = float('NaN')
    else:
        print('No 2nd PV detected.')
        vup_std2 = float('NaN')
        vdn_std2 = float('NaN') 
        

    # Find total standardized VWS anomalies
    vup_total = vup_std1 + vup_std2
    vdn_total = vdn_std1 + vdn_std2
    
    # Append to daily index
    vup_full.append(vup_total)
    vdn_full.append(vdn_total)
    
    print('Field at ', q, 'is complete.')
    
subdaily_vdn = np.array(vdn_full); subdaily_vup = np.array(vup_full)
np.savetxt('/home/jjpjones/pvs_vws_indices/subdaily_vwsdn_idx.txt',vdn_full,delimiter=' ')
np.savetxt('/home/jjpjones/pvs_vws_indices/subdaily_vwsup_idx.txt',vup_full,delimiter=' ')