Goal: 

For one station, extract fractional water index information for the first observation of the day at 25cm depth 

In [None]:
import pandas as pd
from func import cal_buoyancy,sounding_cal
import glob
import os
from haversine import haversine, Unit
import re
import datetime
import metpy.calc as mpcalc
from metpy.units import units
from metpy import constants
from metpy.cbook import get_test_data
from metpy.plots import add_metpy_logo, SkewT
from pyhdf import SD
from scipy.interpolate import interp1d
from scipy.optimize import bisect
import numpy as np
import pickle
import numpy as np
import math
import pint
import seaborn as sns
from pydoc import help
from pyhdf.SD import *
from sklearn.preprocessing import QuantileTransformer
import xarray as xr
import netCDF4 as nc
import re
import matplotlib.pyplot as plt
from numpy import load
from numpy import asarray
from numpy import save
import pytz
from scipy import stats
from sklearn.linear_model import LinearRegression
from collections import Counter
import pymannkendall as mk
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Set display options to show all rows and columns in dataframe
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [12]:
def time_adjustment(file):
    bt = file.variables["base_time"].data
    t = file.coords["time"].data[0]
    t_off = file.variables["time_offset"].data[0]
    
    bt = pd.Timestamp(bt)
    t_off = pd.Timestamp(t_off)
    
    yr = bt.year
    month = bt.month
    day = bt.day
    
    if bt.hour == 0 and bt.minute == 0 and bt.second == 0:
        hr = t_off.hour
        minute = t_off.minute
        second = t_off.second
    else:
        hr = bt.hour
        minute = bt.minute
        second = bt.second
    
    time_final = (yr, month, day, hr, minute, second)
    time_final = datetime.datetime(*time_final)
    return(time_final)

In [3]:
#read in soil moisture files:
#2001-2019 warm-season

directory_path = "../../data/rong1/climate_fu/Data/DOEARM/sgp/okmsoilX1"

#files are .nc
#look like: sgpokmsoilX1.c1.20120901.000000.nc

files = [
    os.path.join(directory_path, file)
    for file in os.listdir(directory_path)
    if file.endswith(".nc")
]

filtered_files = []
for file in files:
    file_name = os.path.basename(file)
    if file_name.startswith("sgpokmsoilX1"):
        year_month = file_name.split('.')[2]
        if len(year_month) == 8:
            year = int(year_month[0:4])
            month = int(year_month[4:6])
            day = int(year_month[6:8])
            hours_mins_seconds = file_name.split('.')[3]
            hour = int(hours_mins_seconds[0:2])
            if 5 <= month <= 9 and 2020 > year > 2000:
                filtered_files.append(file)

soildfs = []
for ds in sorted(filtered_files):
    ds = xr.open_dataset(ds)
    soildfs.append(ds)

In [34]:
soil_station = (36.412010192871094, -97.69393920898438)

def find_ll_index(latarray,findlat):
    
    latindex = np.where(latarray == findlat)
    
    lat_pos = latindex[0][0]
    
    return lat_pos

In [46]:
fwis = []

for df in soildfs: 
    
    date = time_adjustment(df)
    
    lats = df.variables['lat'].data
    
    lons = df.variables['lon'].data
    
    index = find_ll_index(lats,soil_station[0])
    
#     print(lats[index],lons[index])
    
    fwi = df.variables['fractional_water_index'].data[0][1][index]
    
    qcfwi = df.variables['qc_fractional_water_index'].data[0][1][index]

    if qcfwi == 0 and 1 > fwi > 0 :
        
        tup = (date,fwi)
        
        fwis.append(tup)
        
fwidf = pd.DataFrame(fwis,columns=['date','fwi'])
fwidf.set_index('date', inplace=True)

In [50]:
with open('fwi_original.pdkl', 'rb') as file:
    fwi_original = pickle.load(file)

In [51]:
fwi_original.shape

(2819, 1)

In [53]:
mergedfwi = pd.merge(fwidf, fwi_original, left_index=True, right_index=True)

Unnamed: 0_level_0,fwi_x,fwi_y
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2001-05-01,0.180039,0.180039
2001-05-02,0.191783,0.191783
2001-05-03,0.176124,0.176124
2001-05-04,0.160465,0.160465
2001-05-05,0.15655,0.15655
2001-05-06,0.160465,0.160465
2001-05-07,0.16438,0.16438
2001-05-08,0.180039,0.180039
2001-05-09,0.176124,0.176124
2001-05-10,0.180039,0.180039


In [54]:
mergedfwi.head()

Unnamed: 0_level_0,fwi_x,fwi_y
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2001-05-01,0.180039,0.180039
2001-05-02,0.191783,0.191783
2001-05-03,0.176124,0.176124
2001-05-04,0.160465,0.160465
2001-05-05,0.15655,0.15655


In [56]:
mergedfwi['diff'] = mergedfwi['fwi_x'] - mergedfwi['fwi_y']
mergedfwi.head()

Unnamed: 0_level_0,fwi_x,fwi_y,diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2001-05-01,0.180039,0.180039,0.0
2001-05-02,0.191783,0.191783,0.0
2001-05-03,0.176124,0.176124,0.0
2001-05-04,0.160465,0.160465,0.0
2001-05-05,0.15655,0.15655,0.0


In [57]:
np.sum(mergedfwi['diff'])

0.0

## conclusion

soil moisture calculation is consistent with previous calculation