Attempting to read in the instrument data

In [1]:
from pathlib import Path
import xarray as xr
import numpy as np
import pandas as pd

In [2]:
# set file path to netcdf files
PATH = Path('/gws/nopw/j04/iecdt/computer-vision-data/cloudnet-vertical-profile-data/')

In [3]:
#use xarray to read all of the netcdf files, file format .nc

data = xr.open_mfdataset(PATH.glob('*.nc'), combine='nested', join='left', concat_dim='time')

print(data)


<xarray.Dataset> Size: 445MB
Dimensions:                 (time: 60326, height: 459)
Coordinates:
  * height                  (height) float32 2kB 114.5 144.5 ... 1.384e+04
  * time                    (time) datetime64[ns] 483kB 2023-08-07T00:00:15.0...
Data variables:
    target_classification   (time, height) float64 222MB dask.array<chunksize=(2874, 459), meta=np.ndarray>
    detection_status        (time, height) float64 222MB dask.array<chunksize=(2874, 459), meta=np.ndarray>
    cloud_base_height_amsl  (time) float32 241kB dask.array<chunksize=(2874,), meta=np.ndarray>
    cloud_top_height_amsl   (time) float32 241kB dask.array<chunksize=(2874,), meta=np.ndarray>
    cloud_base_height_agl   (time) float32 241kB dask.array<chunksize=(2874,), meta=np.ndarray>
    cloud_top_height_agl    (time) float32 241kB dask.array<chunksize=(2874,), meta=np.ndarray>
    altitude                (time) float32 241kB 85.0 85.0 85.0 ... 85.0 85.0
    latitude                (time) float32 241kB 51.1

# Goals of this section #

The netCDF data contains an array target_classification that we will use as our GT. This array
includes 10 classes describing various atmospheric targets for each timestep.

Instrument Data Processing:

• Group the 10 original classes into two categories:

– No hydrometeors (clear sky): None, Aerosols & insects, Insects, Aerosols.

– Hydrometeors (cloud present): Melting & droplets, Melting ice, Ice & droplets, Ice, Drizzle &
droplets, Drizzle or rain, Droplets.


• For each timestamp, process the vertical profile to form a 459 × 2 vector. Here, each of the 459 discrete
height levels (ranging approximately from 100 m to 14 km) is assigned a pair of class probabilities
indicating the presence (hydrometeors) or absence (clear sky) of clouds

In [4]:
# inspect data
data.target_classification

Unnamed: 0,Array,Chunk
Bytes,211.26 MiB,10.09 MiB
Shape,"(60326, 459)","(2880, 459)"
Dask graph,21 chunks in 69 graph layers,21 chunks in 69 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 211.26 MiB 10.09 MiB Shape (60326, 459) (2880, 459) Dask graph 21 chunks in 69 graph layers Data type float64 numpy.ndarray",459  60326,

Unnamed: 0,Array,Chunk
Bytes,211.26 MiB,10.09 MiB
Shape,"(60326, 459)","(2880, 459)"
Dask graph,21 chunks in 69 graph layers,21 chunks in 69 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [5]:
# assign whether a target classification is a hydrometeor or not

# if data.target_classification == 0 OR > 7, then it is 'No Hydrometeors' (assign value 0). Else, it is 'Hydrometeors' (assign value 1). Store values in new array in data called Hydrometeors
data['Hydrometeors'] = xr.where((data.target_classification == 0) | (data.target_classification > 7), 0, 1)

# view values of hydrometer data to check if it worked
data['Hydrometeors'].load()

In [6]:
# view values for target classification to compare to the above hydrometeors to check if they have been classified correctly
data.target_classification.load()

In [7]:
#Task is... For each timestamp, process the vertical profile to form a 459 × 2 vector. Here, each of the 459 discrete
#height levels (ranging approximately from 100 m to 14 km) is assigned a pair of class probabilities
#indicating the presence (hydrometeors) or absence (clear sky) of clouds

# subset the data to only include the Time, Height and Hydrometeors columns
data_subset = data[['time', 'height', 'Hydrometeors']]
data_subset.load()


In [8]:
# set file path for images from camera A
A_DPATH = Path('/gws/nopw/j04/iecdt/computer-vision-data/cam_a/rectified_imgs')

In [9]:
# read in images from camera A, and convert the times to datetime format
times = [file.stem for file in list(A_DPATH.glob('*.png'))]
times_datetime = pd.to_datetime(times, unit='s')

  times_datetime = pd.to_datetime(times, unit='s')


In [10]:
times_datetime

DatetimeIndex(['2023-08-07 06:00:10', '2023-08-07 06:01:10',
               '2023-08-07 06:02:10', '2023-08-07 06:03:10',
               '2023-08-07 06:04:10', '2023-08-07 06:05:10',
               '2023-08-07 06:06:10', '2023-08-07 06:07:10',
               '2023-08-07 06:08:10', '2023-08-07 06:09:10',
               ...
               '2023-08-27 16:50:20', '2023-08-27 16:51:20',
               '2023-08-27 16:52:20', '2023-08-27 16:53:20',
               '2023-08-27 16:54:20', '2023-08-27 16:55:20',
               '2023-08-27 16:56:20', '2023-08-27 16:57:20',
               '2023-08-27 16:58:20', '2023-08-27 16:59:20'],
              dtype='datetime64[ns]', length=13760, freq=None)

In [None]:
# align the times from the images to the times in the data
aligned_times = data_subset.sel(time=times_datetime, method='nearest')


In [None]:
## old code if want a tolerance level
#tol = pd.Timedelta('5s')
#times_within_tol = (aligned_times.time - times_datetime) <= tol
#data_aligned = aligned_times.sel(time=times_within_tol, method='nearest')

#matched_times = times_datetime[times_within_tol.values]
#matched_times

In [12]:
aligned_times

In [None]:
# save the aligned times, with their respective heights and hydrometeor values, to a netCDF file
aligned_times.to_netcdf(PATH / '../../JERMIT_the_frog/hydrometeors_time_aligned_classes.nc')

In [None]:
#switch out the labels in 'time' in aligned_times with the labels from times


In [31]:
# Assign new times to the 'time' coordinate in aligned_times
aligned_times2 = aligned_times.assign_coords(time=times)

# Verify the changes
aligned_times2

We are only using data within 5 seconds of the groun truth measurements. This means we lose approx. 1 day of measurements, which we think is trhe last day, but we still have data for over 13,000 measuermements

In [32]:
aligned_df_pivot = aligned_times2['Hydrometeors'].to_dataframe().unstack(level=0)
aligned_df_pivot.columns = aligned_df_pivot.columns.droplevel(0)
aligned_df_pivot

time,1691388010,1691388070,1691388130,1691388190,1691388250,1691388310,1691388370,1691388430,1691388490,1691388550,...,1693155020,1693155080,1693155140,1693155200,1693155260,1693155320,1693155380,1693155440,1693155500,1693155560
height,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
114.541252,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
144.518326,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
174.495331,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
204.472412,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
234.449478,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13724.130859,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
13754.108398,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
13784.084961,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
13814.061523,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [33]:
aligned_df_pivot.to_csv(PATH / '../../JERMIT_the_frog/hydrometeors_time_aligned_classes.csv')

In [36]:
print(f'{PATH / "../../JERMIT_the_frog/hydrometeors_time_aligned_classes.csv"}')

/gws/nopw/j04/iecdt/computer-vision-data/cloudnet-vertical-profile-data/../../JERMIT_the_frog/hydrometeors_time_aligned_classes.csv


In [None]:
test = pd.read_csv(PATH / '../../JERMIT_the_frog/hydrometeors_time_aligned_classes.csv')
print(test)

          height  1691388010  1691388070  1691388130  1691388190  1691388250  \
0      114.54125           0           0           0           0           0   
1      144.51833           0           0           0           0           0   
2      174.49533           0           0           0           0           0   
3      204.47241           0           0           0           0           0   
4      234.44948           0           0           0           0           0   
..           ...         ...         ...         ...         ...         ...   
454  13724.13100           0           0           0           0           0   
455  13754.10800           0           0           0           0           0   
456  13784.08500           0           0           0           0           0   
457  13814.06200           0           0           0           0           0   
458  13844.03900           0           0           0           0           0   

     1691388310  1691388370  1691388430