In [4]:
import xarray as xr

import glob
import os
import datetime as dt

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import geopandas as gpd

In [None]:
# the zarr file is only stored on the linux box so this cell cannot be run on my local computer
zarr_path = '/storage/GOES/orthorectified/Fog2022_withtime.zarr'
ds = xr.open_dataset(
    zarr_path,
    chunks={'time': 40785, 'latitude': 50, 'longitude': 50},
    engine='zarr'
)
# When we pass in a chunks argument, the dataset opened will be filled with Dask arrays

In [None]:
height = ds['Height']

In [None]:
fhl = (48.55, -123.01)
fhl_timeseries = ds['Height'].sel(latitude = fhl[0], longitude = fhl[1], method='nearest')
# Convert the timeseries into a pandas dataframe and save in a .csv file to access easily even without access to the zarr file
# fhl_timeseries.to_dataframe().to_csv('fhl_cloudheight_df.csv')  # took 4 mins

In [8]:
mtdallas = (48.53, -123.13)
mtdallas_timeseries = ds['Height'].sel(latitude = mtdallas[0], longitude = mtdallas[1], method='nearest')
# mtdallas_timeseries.to_dataframe().to_csv('mtdallas_cloudheight_df.csv') # took only 10 seconds?!

## Whidbey island

In [4]:
whidbey = (48.350,-122.650)
whidbey_timeseries = ds['Height'].sel(latitude = whidbey[0], longitude = whidbey[1], method='nearest')

In [13]:
clear = whidbey_timeseries.isnull().sum().values
clear_freq = clear/whidbey_timeseries['time'].size
clear_freq

0.1061174451391443

In [12]:
under_2000 = (whidbey_timeseries < 2000).sum().values
under_2000_freq = under_2000/whidbey_timeseries['time'].size
under_2000_freq

0.3407134976094152

In [16]:
under_600 = (whidbey_timeseries < 600).sum().values
under_600_freq = under_600/whidbey_timeseries['time'].size
under_600_freq

0.034547014833885004

## False Bay

### From zarr dataset

In [15]:
# Get the timeseries of cloud height values at False Bay from the dataset of all the rasters
falsebay = (48.48, -123.06)
falsebay_timeseries = height.sel(latitude = falsebay[0], longitude = falsebay[1], method='nearest')
falsebay_timeseries = falsebay_timeseries.sel(time = slice('May 01, 2022', 'July 18, 2022')) # sel only work for xarray DataArray

### From csv file

In [23]:
# If don't have access to the zarr file to read the dataset, get the timeseries data from the csv file that is saved previously
falsebay_df = pd.read_csv('/Users/autumn_yngoc/Downloads/Marine Fog/summerfog/cloud_height_timeseries/falsebay_cloudheight_df.csv')

In [24]:
falsebay_df['time'] = pd.to_datetime(falsebay_df['time'])
falsebay_df.set_index('time', inplace=True)

In [30]:
falsebay_timeseries = falsebay_df['May 01, 2022': 'July 18, 2022']['Height']
falsebay_timeseries

time
2022-05-01 05:01:17    808.78980
2022-05-01 05:06:17    794.14000
2022-05-01 05:11:17    769.72375
2022-05-01 05:16:17    776.43823
2022-05-01 05:21:17    790.17240
                         ...    
2022-07-18 23:36:17          NaN
2022-07-18 23:41:17          NaN
2022-07-18 23:46:17          NaN
2022-07-18 23:51:17          NaN
2022-07-18 23:56:17          NaN
Name: Height, Length: 21872, dtype: float64

In [31]:
falsebay_timeseries = falsebay_timeseries.to_xarray()
falsebay_timeseries

### Frequency calculation

In [32]:
under_2000 = (falsebay_timeseries < 2000).sum().values
under_2000_freq = under_2000/falsebay_timeseries['time'].size
under_2000_freq

0.2173555230431602

In [23]:
under_600 = (falsebay_timeseries < 600).sum().values
under_600_freq = under_600/falsebay_timeseries['time'].size
under_600_freq

0.050201170446232624

In [51]:
# Select only the hours between 5am and 8pm, which are the hours that the field cameras take pictures,
# so that we can compare the frequency of fog/low clouds between satellite-derived cloud height and camera-derived fog classification
falsebay_timeseries = falsebay_timeseries.isel(time=falsebay_timeseries.time.dt.hour.isin(range(6,20)))
under_2000 = (falsebay_timeseries < 2000).sum().values
print(under_2000/falsebay_timeseries['time'].size)
under_600 = (falsebay_timeseries < 600).sum().values
print(under_600/falsebay_timeseries['time'].size)

0.2386122077133313
0.04653811114485272


## Mt Dallas

In [6]:
mtdallas = (48.53, -123.13)
mtdallas_timeseries = ds['Height'].sel(latitude = mtdallas[0], longitude = mtdallas[1], method='nearest')
mtdallas_timeseries = mtdallas_timeseries.sel(time = slice('July 19, 2022', 'September 30, 2022'))

In [45]:
# If don't have access to the zarr file to read the dataset, get the timeseries data from the csv file that is saved previously
mtdallas_df = pd.read_csv('/Users/autumn_yngoc/Downloads/Marine Fog/summerfog/cloud_height_timeseries/mtdallas_cloudheight_df.csv')

In [46]:
mtdallas_df['time'] = pd.to_datetime(mtdallas_df['time'])
mtdallas_df.set_index('time', inplace=True)

In [47]:
mtdallas_timeseries = mtdallas_df['July 19, 2022': 'September 30, 2022']['Height']
mtdallas_timeseries

time
2022-07-19 00:01:17    2278.6510
2022-07-19 00:06:17    2243.5525
2022-07-19 00:11:17    2247.5200
2022-07-19 00:16:17    2264.6116
2022-07-19 00:21:17    2202.9604
                         ...    
2022-09-30 16:31:17    2056.1575
2022-09-30 16:36:17    1989.6229
2022-09-30 16:41:17    1967.6483
2022-09-30 16:46:17    1873.6455
2022-09-30 16:51:17    1857.4697
Name: Height, Length: 18841, dtype: float64

In [48]:
mtdallas_timeseries = mtdallas_timeseries.to_xarray()
mtdallas_timeseries

In [49]:
under_2000 = (mtdallas_timeseries < 2000).sum().values
under_2000_freq = under_2000/mtdallas_timeseries['time'].size
under_2000_freq

0.5320842842736585

In [11]:
under_600 = (mtdallas_timeseries < 600).sum().values
under_600_freq = under_600/mtdallas_timeseries['time'].size
under_600_freq

0.27312775330396477

In [54]:
mtdallas_timeseries = mtdallas_timeseries.isel(time=mtdallas_timeseries.time.dt.hour.isin(range(6,20)))
under_2000 = (mtdallas_timeseries < 2000).sum().values
print(under_2000/mtdallas_timeseries['time'].size)
under_600 = (mtdallas_timeseries < 600).sum().values
print(under_600/mtdallas_timeseries['time'].size)

0.5539432693884243
0.2690605017215937
