In [4]:
from lib.data_handling import load_data, CustomSpectralPipeline
from lib.reproduction import masks, oxide_ranges, spectrometer_wavelength_ranges, major_oxides
from lib.norms import Norm3Scaler

In [2]:
data = load_data(
    "../data/data/calib/calib_2015/1600mm/pls/",
    1
)

Loading data: 100%|██████████| 1/1 [00:00<00:00,  1.75it/s]


In [5]:
pipeline = CustomSpectralPipeline(
    masks=masks,
    major_oxides=major_oxides,
    composition_data_loc="../data/data/calib/ccam_calibration_compositions.csv",
)

processed_data = pipeline.fit_transform(data)
processed_data.head()

Transforming samples: 100%|██████████| 1/1 [00:00<00:00, 11.47it/s]


Unnamed: 0,246.688,246.741,246.79401,246.847,246.89999,246.953,247.007,247.06,247.11301,247.166,...,848.89642,SiO2,TiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O,Sample Name
0,399858900000.0,362310200000.0,312583400000.0,254814300000.0,238410900000.0,253572700000.0,286705500000.0,318151900000.0,322830800000.0,310822000000.0,...,22935410000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
1,310111500000.0,276084000000.0,229415300000.0,178971400000.0,160254600000.0,173736500000.0,203343300000.0,230790300000.0,238090000000.0,228648400000.0,...,22246580000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
2,478439500000.0,436485900000.0,378726100000.0,300735100000.0,286992900000.0,306050200000.0,343736600000.0,386255000000.0,389610300000.0,375093400000.0,...,22395920000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
3,540226600000.0,497416500000.0,426643200000.0,335467000000.0,323240200000.0,343319800000.0,384717500000.0,429803100000.0,431385900000.0,417308900000.0,...,1634240000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
4,492592700000.0,445791700000.0,386244100000.0,306331800000.0,298727300000.0,319164800000.0,356717200000.0,390994000000.0,397830100000.0,383662200000.0,...,22341640000.0,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399


In [6]:
scaler = Norm3Scaler(wavelength_ranges=spectrometer_wavelength_ranges)
scaled = scaler.fit_transform(processed_data)
scaled[:5]

Unnamed: 0,246.688,246.741,246.79401,246.847,246.89999,246.953,247.007,247.06,247.11301,247.166,...,848.89642,SiO2,TiO2,Al2O3,FeOT,MgO,CaO,Na2O,K2O,Sample Name
0,0.000191,0.000173,0.000149,0.000122,0.000114,0.000121,0.000137,0.000152,0.000154,0.000148,...,0.00018,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
1,0.000148,0.000132,0.000109,8.5e-05,7.6e-05,8.3e-05,9.7e-05,0.00011,0.000114,0.000109,...,0.000175,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
2,0.000228,0.000208,0.000181,0.000144,0.000137,0.000146,0.000164,0.000184,0.000186,0.000179,...,0.000176,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
3,0.000258,0.000237,0.000204,0.00016,0.000154,0.000164,0.000184,0.000205,0.000206,0.000199,...,1.3e-05,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399
4,0.000235,0.000213,0.000184,0.000146,0.000143,0.000152,0.00017,0.000187,0.00019,0.000183,...,0.000176,51.03,1,13.83,9.93,6.33,10.7,2.01,0.6,jsc1399


In [12]:
# test if the scaler is working
import numpy as np

drop_cols = major_oxides + ['Sample Name']

# assert sum of each of the three ranges is 3 using spectrometer_wavelength_ranges
def test_norm3_scaler(test_df):
    # Simulated Data
    df = test_df

    # Spectrometer wavelength ranges
    spectrometer_wavelength_ranges = {
        "UV": (223.4, 325.97),
        "VIO": (381.86, 471.03),
        "VNIR": (494.93, 927.06),
    }

    # Initialize and fit-transform the scaler
    scaler = Norm3Scaler(spectrometer_wavelength_ranges)
    scaler.fit(df)
    transformed_df = scaler.transform(df.copy())

    numerical_df = transformed_df.drop(drop_cols, axis=1)

    # Verify the sum of each range is approximately 1
    for key, (start, end) in spectrometer_wavelength_ranges.items():
        selected_columns = [col for col in numerical_df.columns if start <= float(col) <= end]
        range_sum = numerical_df[selected_columns].sum().sum()
        assert np.isclose(range_sum, 1.0), f"Sum for range {key} is not close to 1.0 but is {range_sum}"
        print(f"Sum for range {key} is close to 1.0 ({range_sum})")

# Run the test
test_norm3_scaler(scaled)


Sum for range UV is close to 1.0 (1.0)
Sum for range VIS is close to 1.0 (1.0)
Sum for range VNIR is close to 1.0 (1.0000000000000002)
