In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import numpy as np

#### Read the netCDF file with xarray and then open it as a dataframe 


In [None]:
ds = xr.open_dataset("filepath\\filename.nc")
df = ds.to_pandas()

## Graphing Interception

Now that we have a clean dataframe with datetime in one column and a median number correspoinding to the participant responses in another column, we can create a visualization of snow-interception throughout a water year. The snowgraph function performs the following:

 * If the median value for a given date is greater than or equal to a value of 0.6, we graph a light blue line corresponding to snow present in the canopy. 
 
 * If the median value is less than or equal to 0.4, we graph a white line corresponding to no snow in the canopy. 
  
 * Finally, if the average value is between 0.4 and 0.6, we graph a red line corresponing to large amounts of disagreement between responses. 
 
The snowgraph function can be called with mindate and maxdate values corresponding to the minimum and maximum date to be graphed on the x-axis. 

In [7]:
def snowgraph1(mindate, maxdate):
    plt.figure(figsize = (20,10))
    ax=plt.axes()
    ax.set_facecolor('lightgrey')
    for i in range(0, len(df)): 
        plt.yticks([]) 
        plt.xticks(rotation=45, ha='right')
    
        if df['median_final'][i] == 0:
            plt.axvline(df['datetime'][i], color = 'white', alpha=0.9)
        

        if df['median_final'][i] == 1:
            plt.axvline(df['datetime'][i], color ='lightblue', alpha=0.5)
        
        #optional addition: add red lines for datetimes where mean response was close to 0.5 (high participant disagreement). 
        #if df['mean_final'][i] > 0.45 and mean_final.value[i] < 0.55:
            #plt.axvline(df['datetime'][i], color ='red', alpha=0.5)
        
        plt.xlim([pd.to_datetime(mindate),pd.to_datetime(maxdate)])

In [None]:
snowgraph('2021-10-01 13:56:05','2022-04-30 07:26:05')

## Identify mean amount of time snow is persistent in the canopy.

This function finds the average amount of time for consecutive median values of 1.

In [None]:
N2022 = xr.open_dataset("filepath\\filename.nc")
N2022 = N2022.to_pandas()
N2021 = xr.open_dataset("filepath\\filename.nc")
N2021 = N2021.to_pandas()
N2020 = xr.open_dataset("filepath\\filename.nc")
N2020 = N2020.to_pandas()
N2019 = xr.open_dataset("filepath\\filename.nc")
N2019 = N2019.to_pandas()
N2018 = xr.open_dataset("filepath\\filename.nc")
N2018 = N2018.to_pandas()
N2017 = xr.open_dataset("filepath\\filename.nc")
N2017 = N2017.to_pandas()
N2016 = xr.open_dataset("filepath\\filename.nc")
N2016 = N2016.to_pandas()

In [None]:
def duration(year):
    df = year
    df1 = df.dropna(subset=['median_value']).reset_index(drop=True)

    consecutive_series = []
    current_series_start = None

    for index, row in df1.iterrows():
        if row['median_value'] == 1:
            if current_series_start is None:
                current_series_start = row['datetime']
        elif current_series_start is not None:
            consecutive_series.append((current_series_start, row['datetime']))
            current_series_start = None

    # Calculate the average time difference for each consecutive series of 1 values
    time_diff = pd.Series([end - start for start, end in consecutive_series])
    average_time_diff = pd.Series([end - start for start, end in consecutive_series]).mean()
    converted_diff = (time_diff.dt.seconds)/3600
    plt.hist(converted_diff, label = year, stacked = True, bins = range(30))