In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Get hyperspectral data
hypernetsLJaesL2A_out00_001_0 = pd.read_parquet("../../../../data/hypernetsLJaesL2A/_out00/hypernetsLJaesL2A_out00_001_0.parquet")
hypernetsLJaesL2A_out00_001_0.head(3)

Unnamed: 0,utc_,viewing_azimuth_angle_,viewing_zenith_angle_,350,351,352,353,354,355,356,...,1663,1665,1668,1670,1673,1675,1678,1681,1683,1686
0,1686584064,293,30,0.004625,0.004307,0.00454,0.004497,0.004145,0.003909,0.004103,...,0.014135,0.013998,0.014228,0.014129,0.013977,0.013785,0.013783,0.014484,0.013635,0.013047
1,1692866048,293,30,0.006182,0.005801,0.005869,0.005559,0.005401,0.005406,0.005297,...,0.061748,0.062347,0.062733,0.061653,0.062367,0.062205,0.06245,0.061626,0.06269,0.060976
2,1691820160,293,30,0.010155,0.010084,0.010552,0.010096,0.00904,0.008312,0.009141,...,0.122593,0.123089,0.124041,0.12314,0.123814,0.12261,0.122189,0.12085,0.123345,0.121677


In [3]:
# Function to get dataframe columns within a given interval
def get_columns_within_closed_interval(df, start, end):   
    # Check if all column names can be converted to integers
    are_column_names_integers = all(is_integer(col) for col in df.columns)
    assert are_column_names_integers, "Not all columns can be converted to integers"

    column_names_int_ = df.columns.astype(int)
    mask_ = (column_names_int_ >= start) & (column_names_int_ <= end)
    return df.loc[:, mask_].columns

def is_integer(s):
    try:
        int(s)
        return True
    except ValueError:
        return False

In [4]:
# Sentinel 2 MSI L2 A - spectral resolution
# Reference: https://sentinels.copernicus.eu/web/sentinel/user-guides/sentinel-2-msi/resolutions/spectral
sentinel2MsiL2a_sr = {
    "band_number": ["B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B9", "B11"],
    "central_wl": [442.7, 492.7, 559.8, 664.6, 704.1, 740.5, 782.8, 832.8, 864.7, 945.1, 1613.7],
    "bandwidth": [20, 65, 35, 30, 14, 14, 14, 105, 21, 19, 90]
}

df_sentinel2MsiL2a_sr = (
    pd.DataFrame(sentinel2MsiL2a_sr)
    .assign(bandwidth_left = lambda x: np.round(x["central_wl"] - x["bandwidth"]/2).astype(int))
    .assign(bandwidth_right = lambda x: np.round(x["central_wl"] + x["bandwidth"]/2).astype(int))
    .assign(control = lambda x: (np.round(x["bandwidth_right"] - x["bandwidth_left"]) == x["bandwidth"]))
    .assign(bandwidth_range = '')
    .assign(weights = '')
)

# acceptable only if dataframe is tiny
for index, row in df_sentinel2MsiL2a_sr.iterrows():
    bandwidth_range = list(get_columns_within_closed_interval(
        hypernetsLJaesL2A_out00_001_0.drop(columns=['utc_', 'viewing_azimuth_angle_', 'viewing_zenith_angle_']), 
        row['bandwidth_left'], row['bandwidth_right']))
    df_sentinel2MsiL2a_sr.at[index, 'bandwidth_range'] = bandwidth_range
    df_sentinel2MsiL2a_sr.at[index, 'weights'] = np.repeat([1], len(bandwidth_range))
    
df_sentinel2MsiL2a_sr

Unnamed: 0,band_number,central_wl,bandwidth,bandwidth_left,bandwidth_right,control,bandwidth_range,weights
0,B1,442.7,20,433,453,True,"[433, 434, 435, 436, 437, 438, 439, 440, 441, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
1,B2,492.7,65,460,525,True,"[460, 461, 462, 463, 464, 465, 466, 467, 468, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
2,B3,559.8,35,542,577,True,"[542, 543, 544, 545, 546, 547, 548, 549, 550, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
3,B4,664.6,30,650,680,True,"[650, 651, 652, 653, 654, 655, 656, 657, 658, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
4,B5,704.1,14,697,711,True,"[697, 698, 699, 700, 701, 702, 703, 704, 705, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"
5,B6,740.5,14,734,748,True,"[734, 735, 736, 737, 738, 739, 740, 741, 742, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"
6,B7,782.8,14,776,790,True,"[776, 777, 778, 779, 780, 781, 782, 783, 784, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]"
7,B8,832.8,105,780,885,True,"[780, 781, 782, 783, 784, 785, 786, 787, 788, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
8,B8A,864.7,21,854,875,True,"[854, 855, 856, 857, 858, 859, 860, 861, 862, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
9,B9,945.1,19,936,955,True,"[936, 937, 938, 939, 940, 941, 942, 943, 944, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."


In [5]:
# Define functions to perform spectral convolution operation
def get_wavelength_weights_for_conv_op(df_sentinel, band_number):
    df_ = df_sentinel.query(f"band_number == '{band_number}'")
    dict_ = {
        "wavelength": list(map(str, df_.loc[df_.index[0], "bandwidth_range"])),
        "weights": list(map(float, df_.loc[df_.index[0], "weights"]))
    }
    return pd.DataFrame(dict_).pivot_table(columns='wavelength', values='weights')

def get_sentinel_band_reflectance(df_sentinel, df_hypernets, band_number):
    df_weights_ = get_wavelength_weights_for_conv_op(df_sentinel, band_number)
    df_weights_sum_ = df_weights_.sum(axis=1)
    df_hypernets_ = df_hypernets.filter(items=(get_wavelength_weights_for_conv_op(df_sentinel, band_number)))
    df_mult_ = pd.DataFrame((df_hypernets_ * df_weights_.values).sum(axis=1))
    df_mult_.rename(columns={0: band_number}, inplace=True)
    df_mean_weighted_ = df_mult_ / df_weights_sum_.values
    return df_mean_weighted_

In [6]:
# Perform spectral convolution operation
df_B1 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B1")
df_B2 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B2")
df_B3 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B3")
df_B4 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B4")
df_B5 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B5")
df_B6 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B6")
df_B7 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B7")
df_B8 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B8")
df_B8A = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B8A")
df_B9 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B9")
df_B11 = get_sentinel_band_reflectance(df_sentinel2MsiL2a_sr, hypernetsLJaesL2A_out00_001_0, "B11")
hypernetsLJaesL2A_out01_002_0 = pd.concat([pd.DataFrame(hypernetsLJaesL2A_out00_001_0[['utc_', 'viewing_azimuth_angle_', 'viewing_zenith_angle_']]),
                                           df_B1, df_B2, df_B3, df_B4, df_B5, df_B6, df_B7, df_B8, df_B8A, df_B9, df_B11], axis=1)
hypernetsLJaesL2A_out01_002_0.head(3)

Unnamed: 0,utc_,viewing_azimuth_angle_,viewing_zenith_angle_,B1,B2,B3,B4,B5,B6,B7,B8,B8A,B9,B11
0,1686584064,293,30,0.003088,0.003402,0.007357,0.002229,0.010125,0.068818,0.094357,0.100217,0.102854,0.096867,0.012242
1,1692866048,293,30,0.006597,0.008938,0.020109,0.009941,0.033847,0.173287,0.231642,0.247409,0.254389,0.247647,0.055356
2,1691820160,293,30,0.012875,0.018907,0.044951,0.022655,0.077591,0.325047,0.41535,0.439412,0.451397,0.43017,0.111166


In [7]:
# Load data
PATH_OUTPUT_HYP_UTC_INT = "../../../../data/hypernetsLJaesL2A/_out01/hypernetsLJaesL2A_out01_001_0.parquet"
df_hypernetsLJaesL2A_out01_001_0 = (pd.read_parquet(PATH_OUTPUT_HYP_UTC_INT).drop_duplicates()
                                    .drop(columns=['viewing_azimuth_angle_', 'viewing_zenith_angle_']))
print(df_hypernetsLJaesL2A_out01_001_0.shape)
print(f"unique in-situ dates {df_hypernetsLJaesL2A_out01_001_0['utc_dt'].unique().shape} from "
      f"{df_hypernetsLJaesL2A_out01_001_0['utc_dt'].min()} to "
      f"{df_hypernetsLJaesL2A_out01_001_0['utc_dt'].max()}")
df_hypernetsLJaesL2A_out01_001_0.head(3)

(1885, 12)
unique in-situ dates (1885,) from 2023.05.01 07:02:24 to 2023.08.31 15:34:24


Unnamed: 0,utc_,utc_dt,utc_minus_1u,utc_plus_1u,utc_minus_1u_dt,utc_plus_1u_dt,utc_minus_2u,utc_plus_2u,utc_minus_3u,utc_plus_3u,utc_minus_4u,utc_plus_4u
0,1686584064,2023.06.12 15:34:24,1686580464,1686587664,2023.06.12 14:34:24,2023.06.12 16:34:24,1686576864,1686591264,1686573264,1686594864,1686566064,1686602064
1,1692866048,2023.08.24 08:34:08,1692862448,1692869648,2023.08.24 07:34:08,2023.08.24 09:34:08,1692858848,1692873248,1692855248,1692876848,1692848048,1692884048
2,1691820160,2023.08.12 06:02:40,1691816560,1691823760,2023.08.12 05:02:40,2023.08.12 07:02:40,1691812960,1691827360,1691809360,1691830960,1691802160,1691838160


In [8]:
# Merge with utc intervals
df_hypernetsLJaesL2A_out01_002_0 = (
    pd.merge(hypernetsLJaesL2A_out01_002_0, df_hypernetsLJaesL2A_out01_001_0, on='utc_', how='left')
)
hypernetsLJaesL2A_out01_002_0.head(3)

Unnamed: 0,utc_,viewing_azimuth_angle_,viewing_zenith_angle_,B1,B2,B3,B4,B5,B6,B7,B8,B8A,B9,B11
0,1686584064,293,30,0.003088,0.003402,0.007357,0.002229,0.010125,0.068818,0.094357,0.100217,0.102854,0.096867,0.012242
1,1692866048,293,30,0.006597,0.008938,0.020109,0.009941,0.033847,0.173287,0.231642,0.247409,0.254389,0.247647,0.055356
2,1691820160,293,30,0.012875,0.018907,0.044951,0.022655,0.077591,0.325047,0.41535,0.439412,0.451397,0.43017,0.111166


In [9]:
PATH_OUTPUT = "../../../../data/hypernetsLJaesL2A/_out01/hypernetsLJaesL2A_out01_002_0.parquet"
df_hypernetsLJaesL2A_out01_002_0.to_parquet(PATH_OUTPUT)
print(df_hypernetsLJaesL2A_out01_002_0.shape)
print(f"unique in-situ dates {df_hypernetsLJaesL2A_out01_002_0['utc_dt'].unique().shape} from "
      f"{df_hypernetsLJaesL2A_out01_002_0['utc_dt'].min()} to "
      f"{df_hypernetsLJaesL2A_out01_002_0['utc_dt'].max()}")
pd.read_parquet(PATH_OUTPUT).head(3)

(1905, 25)
unique in-situ dates (1885,) from 2023.05.01 07:02:24 to 2023.08.31 15:34:24


Unnamed: 0,utc_,viewing_azimuth_angle_,viewing_zenith_angle_,B1,B2,B3,B4,B5,B6,B7,...,utc_minus_1u,utc_plus_1u,utc_minus_1u_dt,utc_plus_1u_dt,utc_minus_2u,utc_plus_2u,utc_minus_3u,utc_plus_3u,utc_minus_4u,utc_plus_4u
0,1686584064,293,30,0.003088,0.003402,0.007357,0.002229,0.010125,0.068818,0.094357,...,1686580464,1686587664,2023.06.12 14:34:24,2023.06.12 16:34:24,1686576864,1686591264,1686573264,1686594864,1686566064,1686602064
1,1692866048,293,30,0.006597,0.008938,0.020109,0.009941,0.033847,0.173287,0.231642,...,1692862448,1692869648,2023.08.24 07:34:08,2023.08.24 09:34:08,1692858848,1692873248,1692855248,1692876848,1692848048,1692884048
2,1691820160,293,30,0.012875,0.018907,0.044951,0.022655,0.077591,0.325047,0.41535,...,1691816560,1691823760,2023.08.12 05:02:40,2023.08.12 07:02:40,1691812960,1691827360,1691809360,1691830960,1691802160,1691838160
