In [128]:
#load libraries
import pandas as pd
import numpy as np
import netCDF4 as nc

In [129]:
#set amf directory
amfdir='/Users/jinyuntang/work/ameriflux_data/'
#set amf file
amfcase_dir=amfdir+'AMF_US-Var_FLUXNET_FULLSET_2000-2021_3-5/'
#set amf csv file
amfcsvfile=amfcase_dir+'AMF_US-Var_FLUXNET_ERA5_HH_1981-2021_3-5.csv'
#set output directory
outdir='/Users/jinyuntang/work/github/ecosim_benchmark/smallset/MeditteraneanPastureCA/'
#set output file name
clmncfile='US_Var.climate.hour.1981-2021.nc'
ngrids=1

In [130]:
#define useful functions

def break_ymdhms(ymdhmstr):
    """
    breakdown a string of format: yyyymmddhhmm
    """    
    y=int(ymdhmstr[:4])
    m=int(ymdhmstr[4:6])
    d=int(ymdhmstr[6:8])
    hh=int(ymdhmstr[8:10])
    mm=int(ymdhmstr[10:])
    return y,m,d,hh,mm
    
def is_leap(year):
    """
    leap year
    """
    if (year%4 == 0 and (year%100 != 0 or year%400 == 0)):
        return 1
    else:
        return 0
        
def get_doy(y,m,d):
    """
    get day of year
    """
    daz=[31,28,31,30,31,30,31,31,30,31,30,31];
    leap=is_leap(y)
    doy=0
    for ii in range(m-1):
        doy=doy+daz[ii]
    if m> 2:
        doy=doy+leap
    doy=doy+d
    return doy

def getymdh(timestamp):
    size= len(timestamp)
    ymdh = np.zeros(size)
    for ii in range(size):
        ymdhms=str(timestamp[ii])
        y,m,d,hh,mm=break_ymdhms(ymdhms)
        ymdh[ii]=float(y)+(get_doy(y,m,d)-1.0+(float(mm)/60.0+float(hh))/24.0)/(365.0+is_leap(y))
    return ymdh


def create_ncfile(ncfilename,ngrids):
    """
    create netcdf file for output climate data

    Parameters:
    ncfilename (string): the NetCDF file to write to
    ngrids (int) : number of grids to write
    """
    dataset = nc.Dataset(ncfilename, 'w', format='NETCDF4')
    # Define dimensions
    dataset.createDimension('year', None)
    dataset.createDimension('day', 366)
    dataset.createDimension('hour', 24)
    dataset.createDimension('ngrid', ngrids)
    #define variables
    spval=1.e30
    ispval=-9999
    years_var = dataset.createVariable('year', 'i4', ('year',), fill_value=-9999)
    SRADH_var = dataset.createVariable('SRADH', 'f4', ('year', 'day', 'hour', 'ngrid'),fill_value=spval)
    TMPH_var = dataset.createVariable('TMPH', 'f4', ('year', 'day', 'hour', 'ngrid'),fill_value=spval)
    RAINH_var = dataset.createVariable('RAINH', 'f4', ('year', 'day', 'hour', 'ngrid'),fill_value=spval)
    DWPTH_var = dataset.createVariable('DWPTH', 'f4', ('year', 'day', 'hour', 'ngrid'),fill_value=spval)
    WINDH_var = dataset.createVariable('WINDH', 'f4', ('year', 'day', 'hour', 'ngrid'),fill_value=spval)
    PATM_var  = dataset.createVariable('PATM', 'f4', ('year', 'day', 'hour', 'ngrid'),fill_value=spval)
    Z0G_var=dataset.createVariable('Z0G','f4',('year', 'ngrid'),fill_value=spval)
    IFLGW_var=dataset.createVariable('IFLGW','i4',('year', 'ngrid'),fill_value=ispval)
    ZNOONG_var=dataset.createVariable('ZNOONG','f4',('year', 'ngrid'),fill_value=spval)
    PHRG_var=dataset.createVariable('PHRG','f4',('year', 'ngrid'),fill_value=spval)
    CN4RIG_var=dataset.createVariable('CN4RIG','f4',('year', 'ngrid'),fill_value=spval)
    CNORIG_var=dataset.createVariable('CNORIG','f4',('year', 'ngrid'),fill_value=spval)
    CPORG_var=dataset.createVariable('CPORG','f4',('year', 'ngrid'),fill_value=spval)
    CALRG_var=dataset.createVariable('CALRG','f4',('year', 'ngrid'),fill_value=spval)
    CFERG_var=dataset.createVariable('CFERG','f4',('year', 'ngrid'),fill_value=spval)
    CCARG_var=dataset.createVariable('CCARG','f4',('year', 'ngrid'),fill_value=spval)
    CMGRG_var=dataset.createVariable('CMGRG','f4',('year', 'ngrid'),fill_value=spval)
    CNARG_var=dataset.createVariable('CNARG','f4',('year', 'ngrid'),fill_value=spval)
    CKARG_var=dataset.createVariable('CKARG','f4',('year', 'ngrid'),fill_value=spval)
    CSORG_var=dataset.createVariable('CSORG','f4',('year', 'ngrid'),fill_value=spval)
    CCLRG_var=dataset.createVariable('CCLRG','f4',('year', 'ngrid'),fill_value=spval)

    # Add attributes to variables (optional)
    years_var.long_name ='year AD'
    SRADH_var.units = 'W/m^2'
    SRADH_var.long_name = "Hourly incident solar radiation"
    
    TMPH_var.units='oC'
    TMPH_var.long_name='Hourly air temperature'
    
    RAINH_var.units='mm m^-2 hr^-1'
    RAINH_var.long_name='Hourly total precipitation'
    
    DWPTH_var.units='kPa'
    DWPTH_var.long_name='Hourly atmospheric vapor pressure'
    
    WINDH_var.units='m/s'
    WINDH_var.long_name='Hourly horizontal wind speed'
    
    PATM_var.units='kPa'
    PATM_var.long_name='Hourly atmospheric pressure'

    Z0G_var.long_name = "windspeed measurement height" 
    Z0G_var.units = "m" 

    IFLGW_var.long_name = "flag for raising Z0G with vegeation" 

    ZNOONG_var.long_name = "time of solar noon" 
    ZNOONG_var.units = "hour" 

    PHRG_var.long_name = "pH in precipitation" 

    CN4RIG_var.long_name = "NH4 conc in precip" 
    CN4RIG_var.units = "gN m^-3" 

    CNORIG_var.long_name = "NO3 conc in precip" 
    CNORIG_var.units = "gN m^-3" 
    
    CPORG_var.long_name = "H2PO4 conc in precip" 
    CPORG_var.units = "gP m^-3" 
    
    CALRG_var.long_name = "Al conc in precip" 
    CALRG_var.units = "gAl m^-3" 
    
    CFERG_var.long_name = "Fe conc in precip" 
    CFERG_var.units = "gFe m^-3" 
    
    CCARG_var.long_name = "Ca conc in precip" 
    CCARG_var.units = "gCa m^-3" 
    
    CMGRG_var.long_name = "Mg conc in precip" 
    CMGRG_var.units = "gMg m^-3" 
    
    CNARG_var.long_name = "Na conc in precip" 
    CNARG_var.units = "gNa m^-3" 
    
    CKARG_var.long_name = "K conc in precip" 
    CKARG_var.units = "gK m^-3" 
    
    CSORG_var.long_name = "SO4 conc in precip" 
    CSORG_var.units = "gS m^-3" 
    
    CCLRG_var.long_name = "Cl conc in precip" 
    CCLRG_var.units = "gCl m^-3" 
    
    #return 
    return dataset

def write_data(dataset, varname, nyear, sdata=None, fdata=None, idata=None,year=None):
    """
    Write SRADH data to the NetCDF file for year.

    Parameters:
    dataset (nc.Dataset): The NetCDF dataset to write to.
    varname (string): variable to write
    sdata (numpy.ndarray): The sdata data array with dimensions (ndays, nhours, ngrids).
    nyears (int): The number of years in the data.
    """  
    if year:
        dataset.variables['year'][nyear] = year
    if sdata is not None:    
        dataset.variables[varname][nyear, :, :, :] = sdata
    if idata is not None:
        dataset.variables[varname][nyear, :] = idata
    if fdata is not None:
        dataset.variables[varname][nyear, :] = fdata



def close_netcdf_file(dataset):
    """
    Close the NetCDF file.

    Parameters:
    dataset (nc.Dataset): The NetCDF dataset to close.
    """
    dataset.close()
    print("NetCDF file successfully closed.")

def get_ndays_from_year1_to_year2(year1,year2):
    """
    Get total days from year1 to year2

    Parameters:
    year1 (int): first year
    year2 (int): last year
    """
    ndays=0
    for year in range(year1,year2+1):
        ndays=ndays+365+is_leap(year)
    #return    
    return ndays

def vapsat(tempC):
    """
    compute saturated water vapor pressure

    Parameters:
    tempC (float): temperature in celcius degree
    """
    tempK=tempC+273.15
    #vapor in kPa
    vap=0.61*np.exp(5360.0*(3.661E-03-1.0/tempK))
    
    return vap



In [133]:
#read csv file
# Read the CSV file into a DataFrame
df = pd.read_csv(amfcsvfile)

print(df)
#get year in float
dt=df['TIMESTAMP_END'][0]-df['TIMESTAMP_START'][0]
tlen=len(df['TIMESTAMP_START'])

#get beg year info
year0,_,_,_,_=break_ymdhms(str(df['TIMESTAMP_START'][0]))
#get end year info
yeare,_,_,_,_=break_ymdhms(str(df['TIMESTAMP_START'][tlen-1]))
ndays=get_ndays_from_year1_to_year2(year0,yeare)

#create output file
dataset=create_ncfile(clmncfile,ngrids)

if dt == 30:
    #climate data is half-hourly time step
    print("processing half-hourly climate data, from year %d to year %d\n"%(year0,yeare))
    #check data consistency
    
    if tlen != 48*ndays:
        print("required %d data entries, found %d\n"%(48*ndays,tlen))
        raise Exception("Data reading error")
    ii=0
    for year in range(year0,yeare+1):
        
        temp=np.zeros((366, 24, ngrids))
        patm=np.zeros((366, 24, ngrids))
        prec=np.zeros((366, 24, ngrids))
        windh=np.zeros((366, 24, ngrids))
        vap=np.zeros((366, 24, ngrids))
        srad=np.zeros((366, 24, ngrids))
        ndays=365+is_leap(year)
        
        #below are site specific 
        IFLGW=np.zeros(ngrids,dtype=int)
        ZNOONG=np.zeros(ngrids)+12.5
        Z0G=np.zeros(ngrids)+10.
        PHRG=np.zeros(ngrids)+7. 
        CN4RIG=np.zeros(ngrids)+0.5 
        CNORIG=np.zeros(ngrids)+0.25 
        CPORG=np.zeros(ngrids)+0.05 
        CALRG=np.zeros(ngrids)
        CFERG=np.zeros(ngrids)
        CCARG=np.zeros(ngrids)
        CMGRG=np.zeros(ngrids)
        CNARG=np.zeros(ngrids)
        CKARG=np.zeros(ngrids)
        CSORG=np.zeros(ngrids)
        CCLRG=np.zeros(ngrids)
        for d in range(ndays):
            for h in range(24):                                
                i1=ii*2
                i2=i1+1
                y,m,d1,h1,_=break_ymdhms(str(df['TIMESTAMP_START'][i1]))        
                day=get_doy(y,m,d1)
                if day-1 != d or h1 !=h or y !=year:
                    print("error in time matching, required (%d,%d,%h), found (%d,%d,%d)"%(year,d,h,y,d1,h1))
                    raise Exception("Data processing error")
                #TA_ERA: air temperature
                tempC=(df['TA_ERA'][i1]+df['TA_ERA'][i2])*0.5
                temp[day-1,h,0]=tempC
                #SW_IN_ERA: incoming shortwave radiation        
                srad[day-1,h,0]=(df['SW_IN_ERA'][i1]+df['SW_IN_ERA'][i2])*0.5
                #PA_ERA: atmospheric pressure kPa
                patm[day-1,h,0]=(df['PA_ERA'][i1]+df['PA_ERA'][i2])*0.5
                #WS_ERA: wind speed
                windh[day-1,h,0]=(df['WS_ERA'][i1]+df['WS_ERA'][i2])*0.5
                #P_ERA: precipitation
                prec[day-1,h,0]=(df['P_ERA'][i1]+df['P_ERA'][i2])
                #VPD_ERA: vapor pressure deficit
                vps=vapsat(tempC)
                vap[day-1,h,0]=np.min([vps,np.max([vps-(df['VPD_ERA'][i1]+df['VPD_ERA'][i2])*0.5,0.0])])
                if vap[day-1,h,0]<0.0:
                    print('tempC=%f,vps=%f,vap=%f\n'%(tempC,vps,vap[day-1,h,0]))
                    raise Exception("Data  error")
                ii=ii+1
        nyear=year-year0        
        write_data(dataset, 'TMPH',nyear,sdata=temp, year=year)
        write_data(dataset, 'SRADH',nyear,sdata=srad)
        write_data(dataset, 'RAINH',nyear,sdata=prec)
        write_data(dataset, 'DWPTH',nyear,sdata=vap)
        write_data(dataset, 'PATM',nyear,sdata=patm)
        write_data(dataset, 'WINDH',nyear,sdata=windh)
        write_data(dataset, 'IFLGW', nyear,idata=IFLGW)
        write_data(dataset, 'ZNOONG',nyear,fdata=ZNOONG)
        write_data(dataset, 'Z0G',nyear,fdata=Z0G)
        write_data(dataset, 'PHRG',nyear,fdata=PHRG)
        write_data(dataset, 'CN4RIG',nyear,fdata=CN4RIG)
        write_data(dataset, 'CNORIG',nyear,fdata=CNORIG)
        write_data(dataset, 'CPORG',nyear,fdata=CPORG)
        write_data(dataset, 'CALRG',nyear,fdata=CALRG)
        write_data(dataset, 'CFERG',nyear,fdata=CFERG)
        write_data(dataset, 'CCARG',nyear,fdata=CCARG)
        write_data(dataset, 'CMGRG',nyear,fdata=CMGRG)
        write_data(dataset, 'CNARG',nyear,fdata=CNARG)
        write_data(dataset, 'CKARG',nyear,fdata=CKARG)
        write_data(dataset, 'CSORG',nyear,fdata=CSORG)
        write_data(dataset, 'CCLRG',nyear,fdata=CCLRG)

else:
    #climate data in hourly time step
    print('processing hourly climate data, from year %d\n'%year0)
    if tlen != 48*ndays:
        print("required %d data entries, found %d\n"%(48*ndays,tlen))
        raise Exception("Data reading error")
    ii=0
    for year in range(year0,yeare+1):        
        temp=np.zeros((366, 24, ngrids))
        patm=np.zeros((366, 24, ngrids))
        prec=np.zeros((366, 24, ngrids))
        windh=np.zeros((366, 24, ngrids))
        vap=np.zeros((366, 24, ngrids))
        srad=np.zeros((366, 24, ngrids))
        ndays=365+is_leap(year)
        for d in range(ndays):
            for h in range(24):                                
                y,m,d1,h1,_=break_ymdhms(str(df['TIMESTAMP_START'][ii]))        
                day=get_doy(y,m,d1)
                if day-1 != d or h1 !=h or y !=year:
                    print("error in time matching, required (%d,%d,%h), found (%d,%d,%d)"%(year,d,h,y,d1,h1))
                    raise Exception("Data processing error")
                #TA_ERA: air temperature
                tempC=df['TA_ERA'][ii]
                temp[day-1,h,0]=tempC
                #SW_IN_ERA: incoming shortwave radiation        
                srad[day-1,h,0]=df['SW_IN_ERA'][ii]
                #PA_ERA: atmospheric pressure kPa
                patm[day-1,h,0]=df['PA_ERA'][ii]
                #WS_ERA: wind speed
                windh[day-1,h,0]=df['WS_ERA'][ii]
                #P_ERA: precipitation
                prec[day-1,h,0]=df['P_ERA'][ii]
                #VPD_ERA: vapor pressure deficit
                vps=vapsat(tempC)
                vap[day-1,h,0]=np.min([vps,np.max([vps-df['VPD_ERA'][ii],0.0])])
                ii=ii+1
        nyear=year-year0        
        write_data(dataset, 'TMPH',nyear,sdata=temp, year=year)
        write_data(dataset, 'SRADH',nyear,sdata=srad)
        write_data(dataset, 'RAINH',nyear,sdata=prec)
        write_data(dataset, 'DWPTH',nyear,sdata=vap)
        write_data(dataset, 'PATM',nyear,sdata=patm)
        write_data(dataset, 'WINDH',nyear,sdata=windh)
        write_data(dataset, 'IFLGW', nyear,idata=IFLGW)
        write_data(dataset, 'ZNOONG',nyear,fdata=ZNOONG)
        write_data(dataset, 'Z0G',nyear,fdata=Z0G)
        write_data(dataset, 'PHRG',nyear,fdata=PHRG)
        write_data(dataset, 'CN4RIG',nyear,fdata=CN4RIG)
        write_data(dataset, 'CNORIG',nyear,fdata=CNORIG)
        write_data(dataset, 'CPORG',nyear,fdata=CPORG)
        write_data(dataset, 'CALRG',nyear,fdata=CALRG)
        write_data(dataset, 'CFERG',nyear,fdata=CFERG)
        write_data(dataset, 'CCARG',nyear,fdata=CCARG)
        write_data(dataset, 'CMGRG',nyear,fdata=CMGRG)
        write_data(dataset, 'CNARG',nyear,fdata=CNARG)
        write_data(dataset, 'CKARG',nyear,fdata=CKARG)
        write_data(dataset, 'CSORG',nyear,fdata=CSORG)
        write_data(dataset, 'CCLRG',nyear,fdata=CCLRG)

close_netcdf_file(dataset)

        TIMESTAMP_START  TIMESTAMP_END  TA_ERA  SW_IN_ERA  LW_IN_ERA  VPD_ERA  \
0          198101010000   198101010030   3.266      0.000    275.164    0.354   
1          198101010030   198101010100   3.066      0.000    275.164    0.248   
2          198101010100   198101010130   2.867      0.000    274.376    0.143   
3          198101010130   198101010200   2.003      0.000    274.376    0.118   
4          198101010200   198101010230   1.139      0.000    273.886    0.093   
...                 ...            ...     ...        ...        ...      ...   
718795     202112312130   202112312200   9.262    145.011    285.080    3.950   
718796     202112312200   202112312230   9.262    145.011    285.080    3.950   
718797     202112312230   202112312300   9.262    145.011    285.080    3.950   
718798     202112312300   202112312330   9.262    145.011    285.080    3.950   
718799     202112312330   202201010000   9.262    145.011    285.080    3.950   

        PA_ERA  P_ERA  WS_E