In [74]:
import sys
import os
import datetime
import pandas as pd
import numpy as np
import pytz
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.gridspec as gridspec
from matplotlib.colors import Normalize

sys.path.append('..')
import funcs.ac_funcs as ac

In [80]:
dt1_utc

datetime.datetime(2024, 8, 8, 6, 0, tzinfo=<UTC>)

In [83]:
mlg = ac.met_loader_ggg('/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1/Data/met/UUSYR/daily_txt_gggformat')
timezone = 'US/Mountain'
dt1 = datetime.datetime.strptime(f'2024-07-30 10:00:00','%Y-%m-%d %H:%M:%S') #create the datetime
dt2 = datetime.datetime.strptime(f'2024-08-08 00:00:00','%Y-%m-%d %H:%M:%S') #create the datetime

dt1 = pytz.timezone(timezone).localize(dt1)
dt2 = pytz.timezone(timezone).localize(dt2)
dt1_utc = dt1.astimezone(pytz.timezone('UTC'))
dt2_utc = dt2.astimezone(pytz.timezone('UTC'))

met_df = mlg.load_data_inrange(dt1_utc,dt2_utc)
met_df.index = met_df.index.tz_convert(timezone)

In [87]:
class NamesInRange:
    def __init__(self,base_path):
        self.base_path = base_path

    def get_files_inrange(self,dt1,dt2,path = None):
        '''This gets the files that contain data in the range between dt1 and dt2
        
        Args:
        dt1 (datetime.datetime) : a timezone aware datetime to start the data period
        dt2 (datetime.datetime) : a timezone aware datetime to end the data period
        path (str, optional) : a specific path to look in other than self.base_path
        
        Returns:
        files_in_range (list) : a list of filenames only, in the path, that fall in the dt range
        '''
        daystrings_in_range = [] #initialize the day strings in the range
        delta_days = dt2.date()-dt1.date() #get the number of days delta between the end and the start
        for i in range(delta_days.days +1): #loop through that number of days 
            day = dt1.date() + datetime.timedelta(days=i) #get the day by incrementing by i (how many days past the start)
            daystrings_in_range.append(day.strftime('%Y%m%d')) #append a string of the date (YYYYmmdd) to match with filenames
        files_in_range = [] #initilize the filenames that will be in the range
        for file in self.get_sorted_fnames(path = path): #loop through the sorted oof files in the data folder
            for daystring_in_range in daystrings_in_range: # loop through the daystrings that are in the range
                if daystring_in_range in file: #if the daystring is in the filename, 
                    files_in_range.append(file) #append it. Otherwise keep going
        return files_in_range

    def get_sorted_fnames(self,path = None):
        '''Sorts the filenames within the daily met path
        

        Returns:
        (list) : a sorted list of all of the elements within the base_path or input path
        '''
        if path is None:
            path = self.base_path
    
        return sorted(os.listdir(path))    

def load_picarro_day(path,out_tz='UTC'):
    pic_df = pd.read_csv(path,sep='\s+')
    pic_df.index = pd.to_datetime(pic_df['DATE']+' '+pic_df['TIME'],format='%m/%d/%y %H:%M:%S.%f').dt.tz_localize('UTC') #parse the datetime
    pic_df.index = pic_df.index.tz_convert(out_tz)
    pic_df = pic_df.drop(['DATE','TIME'],axis = 1)
    return pic_df


picarro_path = '/uufs/chpc.utah.edu/common/home/u0890904/LAIR_1/Data/Insitu/NIST_Picarro'
fir = NamesInRange(picarro_path)
pic_data_id = 'Data'
out_tz = 'US/Mountain'
resample = '1min'

folders_in_range = fir.get_files_inrange(dt1,dt2)
full_pic_df = pd.DataFrame()
for folder in folders_in_range:
    full_daypath = os.path.join(fir.base_path,folder)
    for file in os.listdir(full_daypath):
        if pic_data_id in file:
            df = load_picarro_day(os.path.join(full_daypath,file),out_tz=out_tz)
            if resample is not None:
                df = df.resample(resample).mean()
            full_pic_df = pd.concat([full_pic_df,df])

full_pic_df = full_pic_df.loc[(full_pic_df.index>=dt1)&(full_pic_df.index<=dt2)]

In [88]:
full_df = met_df.join(full_pic_df).dropna()

In [102]:
subdf = full_df.loc[(full_df['wd']>200) &(full_df['wd']<325) &(full_df['ws']>2)]
params = {'ws':'ws (m/s)','wd':'wd (from north)','CO2':'CO2 (ppm)','CH4':'CH4 (ppm)'}
df_dict = {'all_data':[full_df,'black',4],'200 < wd < 320<br>ws > 2 m/s':[subdf,'red',3]}

fig = make_subplots(rows=len(params),cols=1,shared_xaxes=True)

row = 1
for param,ax_label in params.items():
    for key,values in df_dict.items():
        df = values[0]
        color = values[1]
        leg = False if row>1 else True
        fig.add_trace(go.Scatter(
        x = df.index,
        y = df[param],
        mode = 'markers',
        marker = {'color' : color,'size':values[2]},
        name = key,
        showlegend = leg,
        ),row = row,col = 1)
        fig.update_yaxes(title_text = ax_label,row=row,col=1)
    row+=1

fig.update_layout(
    title = 'SYR',
    height=600,#*len(params),
    margin = go.layout.Margin(t=1,b=1)
)
fig.show()