In [1]:
import numpy as np
import xarray as xr
import pandas as pd
from netCDF4 import Dataset, MFDataset
import matplotlib.pyplot as plt
import glob
from datetime import datetime, timedelta
from scipy import stats
from sklearn.metrics import mean_squared_error
import calendar

import sys, os, time, warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=UserWarning)

from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
# warnings.filterwarnings("ignore", category=SettingWithCopyWarning)

In [2]:
def calc_q_vpd(pa,ta,vpd):              # Specific humidity [kg/kg] from VPD
    eps = 0.622
    es = 611.2*np.exp(17.67*ta/(ta+243.5))   # Ta in ˚C
    eu = es - 100*vpd                   # VPD in hPa
    return eps*eu/(1000*pa-(1-eps)*eu)  # Pa in kPa

def calc_theta(pa,ta):                  # potential temperature [K]
    return (ta+273.15)*(100./pa)**(0.286)    # Ta in ˚C, Pa in kPa

In [3]:
dir = './data/'
levs = [1.5, 56.0]

f1 = sorted(glob.glob(dir+'MV00501.*'))
f2 = sorted(glob.glob(dir+'MV00505.*'))
ta1  = pd.read_csv(f1[0],na_values=-9999)
vpd1 = pd.read_csv(f2[0],na_values=-9999)
del f1
del f2

ta1 = ta1.rename(columns={'DATE_TIME': 'time'}, errors="raise")
time1 = pd.to_datetime(ta1['time'])
ta1['time'] = time1
del time1

vpd1 = vpd1.rename(columns={'DATE_TIME': 'time'}, errors="raise")
time1 = pd.to_datetime(vpd1['time'])
vpd1['time'] = time1
del time1


In [4]:
lmask = ( ta1['PROBE_HT']==levs[0] )
ta_l1 = ta1[lmask].resample('1H', on = 'time').mean()
lmask = ( vpd1['PROBE_HT']==levs[0] )
vpd_l1 = vpd1[lmask].resample('1H', on = 'time').mean()


data_l1 = ta_l1.assign(VPD_MEAN=vpd_l1['VPD_MEAN'])
data_l1 = data_l1.assign(Q_MEAN=calc_q_vpd(101.325,data_l1['AIRTEMP_MEAN'],data_l1['VPD_MEAN']))
data_l1 = data_l1.assign(THETA_MEAN=calc_theta(101.325,data_l1['AIRTEMP_MEAN']))
data_l1 = data_l1.reset_index()

In [5]:
lmask = ( ta1['PROBE_HT']==levs[1] )
ta_l2 = ta1[lmask].resample('1H', on = 'time').mean()
lmask = ( vpd1['PROBE_HT']==levs[1] )
vpd_l2 = vpd1[lmask].resample('1H', on = 'time').mean()

data_l2 = ta_l2.assign(VPD_MEAN=vpd_l2['VPD_MEAN'])
data_l2 = data_l2.assign(Q_MEAN=calc_q_vpd(101.325,data_l2['AIRTEMP_MEAN'],data_l2['VPD_MEAN']))
data_l2 = data_l2.assign(THETA_MEAN=calc_theta(101.325,data_l2['AIRTEMP_MEAN']))
data_l2 = data_l2.reset_index()

tmask = ( data_l2['time'] >= '2016-09-06' )
data_l2_2 = data_l2[tmask]
data_l2_2 = data_l2_2.set_index([ pd.Index([i for i in range(tmask.sum())]) ])
del data_l2
data_l2 = data_l2_2
del data_l2_2

In [6]:
data_l1_dy = data_l1.resample('1D', on = 'time').mean()
data_l2_dy = data_l2.resample('1D', on = 'time').mean()
data_l1_dy = data_l1_dy.reset_index()
data_l2_dy = data_l2_dy.reset_index()

In [8]:
month = [x for x in range(1,13)]
month_name = [calendar.month_abbr[x] for x in range(1,13)]

# 1.5-meter calculation
data = data_l1
data_dy = data_l1_dy
level = levs[0]
# 56-meter calculation
data = data_l2
data_dy = data_l2_dy
level = levs[1]

cnt1 = 0
t_start = time.perf_counter()
for m,mon in enumerate(month_name):
    mask2 = ( data_dy['time'].dt.month == month[m] )
    me, te = np.nanmean(2.5e6*data_dy['Q_MEAN'][mask2]), np.nanmean(1005.*data_dy['THETA_MEAN'][mask2])
    d_out = pd.DataFrame(np.array([[me, te]]),columns=['ME_MEAN', 'TE_MEAN'], index=['daily'])
    
    for hr in range(24):
        mask1 = ( data['time'].dt.month == month[m] ) & ( data['time'].dt.hour == hr )
        me, te = np.nanmean(2.5e6*data['Q_MEAN'][mask1]), np.nanmean(1005.*data['THETA_MEAN'][mask1])
        d_out = d_out.append( pd.DataFrame(np.array([[me, te]]),columns=['ME_MEAN', 'TE_MEAN'], index=[str(hr)]) )
        
    d_out['month'] = month[m]
    if cnt1==0:
        d_out2 = d_out
    else:
        d_out2 = d_out2.append( d_out )
    cnt1 = cnt1 + 1
    del d_out


d_out2['sub-daily'] = d_out2.index
d_out2.fillna(np.nan).to_csv("FULLSET/knb-lter_"+str(level)+"meters.csv",float_format='%.8g',index=False)
print("end making Dataframe : ",time.perf_counter()-t_start,"seconds")
del d_out2
    
                        

end making Dataframe :  2.5115316659212112 seconds
