In [11]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [12]:
# -- set mpl defaults
plt.rcParams["image.interpolation"] = "nearest"

In [13]:
%matplotlib widget

In [14]:
# -- set the file list
dpath = "../data/noaa/"
fnames = [
    "High_Pressure_Sodium_Lamps_20100311.xls", 
    "LED_Lamps_20100311.xls", 
    "Fluorescent_Lamps_20100311.xls", 
    "Incandescent_Lamps_20100311.xls", 
    "Quart_Halogen_Lamps_20100311.xls", 
    "Metal_Halide_Lamps_20100311.xls", 
    "Mercury_Vapor_Lamp_20100311.xls", 
    "Low_Pressure_Sodium_Lamp_20100311.xls", 
#     "Oil_Lanterns_20100311.xls", 
    "Pressurized_Gas_Lanterns_20100311.xls"
]


# -- read the files
noaa = [pd.read_excel(os.path.join(dpath, i)) for i in fnames]


# -- remove extra wavelength in Quartz Halogen
noaa[4] = noaa[4].iloc[:-1]


# -- set the wavelengths (note they are now all the same)
waves = noaa[0]["Wavelength (nm)"].values


# -- drop all un-named columns and wavelength columns
for ii in range(len(noaa)):
    ucols = noaa[ii].columns.str.contains("Unnamed")
    noaa[ii].drop(columns=noaa[ii].columns[ucols], inplace=True)
    noaa[ii].drop(columns=["Wavelength (nm)"], inplace=True)


# -- append abbreviation to all examples for a given type
# abbreviations = ["HPS", "LED", "FLU", "INC", "QHL", "MHL", "MER", "LPS", "OIL", "PGL"]
abbreviations = ["HPS", "LED", "FLU", "INC", "QHL", "MHL", "MER", "LPS", "PGL"]
for ii, abr in enumerate(abbreviations):
    cols = [abr + ": " + i for i in noaa[ii].columns]
    noaa[ii].columns = cols


# -- load the HSI waves
waves13 = np.load("../output/graham_hall_upload/cube0_waves_range.npy")


# -- sub-set the rows
wlo = np.floor(waves13[0]) - 1
whi = np.ceil(waves13[-1]) + 1
ind = (waves > wlo) & (waves < whi)


# -- loop through types
noaa_sub = []
for ii in range(len(noaa)):
    tnoaa = noaa[ii].iloc[ind].copy()
    noaa_sub.append(tnoaa)

    # -- find correlations within types
    corr = tnoaa.corr() > 0.95

    # -- recursively go through columns
    fin = False
    cols = tnoaa.columns
    ncol = len(cols)
    cind = 0

    while not fin:

        for jj in range(cind + 1, ncol):
            if corr.loc[cols[cind], cols[jj]]:
                tnoaa.drop(columns=cols[jj], inplace=True)

        cind += 1
        cols = tnoaa.columns
        ncol = len(cols)

        if cind == ncol:
            fin = True


# -- over-write NOAA
noaa = pd.concat(noaa_sub, axis=1).reset_index(drop=True)


# -- filter combined list
corr = noaa.corr() > 0.95


# -- recursively go through columns
fin = False
cols = noaa.columns
ncol = len(cols)
cind = 0

while not fin:

    for jj in range(cind + 1, ncol):
        if corr.loc[cols[cind], cols[jj]]:
            noaa.drop(columns=cols[jj], inplace=True)

    cind += 1
    cols = noaa.columns
    ncol = len(cols)

    if cind == ncol:
        fin = True


# -- add wavelengths
noaa.insert(loc=0, column="wavelength", value=waves[ind])


# -- write out the results
np.save("../output/noaa_lab_spectra_names.npy", noaa.columns[1:].values)
np.save("../output/noaa_lab_spectra.npy", noaa[noaa.columns[1:]].values)
np.save("../output/noaa_lab_spectra_wavelengths.npy", noaa["wavelength"].values.astype(float))