# Example

- check if .hscfg file in your home folder is correctly configured (see hsdsaccess_example.ipynb)


In [None]:
study = "/SILICON_STUDY/"

In [None]:
from ramanchada2.spectrum import from_chada
from ramanchada2.io.HSDS import visit_domain, filter_dataset, read_cha
import math
import h5pyd


def load_dataset(parentdomain,domain,results=[],h5module=h5pyd):
    with h5module.File(domain) as f:
        wavelength = f["annotation_study"].attrs["wavelength"]
        instrument = f["annotation_study"].attrs["instrument"]
        laser_power = f["annotation_study"].attrs["laser_power"]
        provider = f["annotation_study"].attrs["provider"]
        sample = f["annotation_sample"].attrs["sample"]
    results.append((domain,provider,instrument,wavelength,laser_power,sample))

query_sample = "S0N"

results = []
# query by sample
#visit_domain(study, process_dataset=filter_dataset,kwargs={"process_file" : load_dataset,"sample": query_sample, 
#                            "kwargs" : {"results" : results}})
# retrieve everything under study
visit_domain(study, process_dataset=load_dataset, kwargs = {"results" : results})

In [None]:
import pandas as pd
df = pd.DataFrame(results,columns=["domain","provider","instrument","wavelength","laser_power","sample"])
df.head()

In [None]:
def process_row(row):
    print(row["domain"])
    #apply peak finding tbd
    
df.apply(lambda row: process_row(row),axis=1)

In [None]:
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np

#test follows, to be applied to all rows

In [None]:
domain = df[df["sample"]=="S0B"].iloc[0]["domain"]
spe = from_chada(domain,h5module=h5pyd)
           # method: Literal['x-axis', 'bins'],
#spe.plot()
spe = spe.trim_axes("x-axis",[450,600])
#spe = spe.normalize("unity")
spe.plot()
bgm = spe.bayesian_gaussian_mixture(n_samples=10000,  # type: ignore
            n_components=5,
            max_iter=1000,
            moving_minimum_window=16,
            random_state=42,
            trim_range=None)
#spe = spe.normalize("unity")


In [None]:
def normalize_area(spe):
    res = spe.y # - np.min(spe.y)
    res /= (np.sum(res) * (spe.x[1]-spe.x[0]))
    return res

def plotdist(bgm_peaks,spe,threshold=0.00001):
    new_spe = spe.normalize('unity')
    spe = spe - spe.moving_minimum(16)    
    y_norm = normalize_area(spe)
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(12,2))
    ax1.plot(spe.x,y_norm,':',label='spe')
    n_clusters = (np.round(bgm.weights_, 2) > 0).sum()
    _tmp = None
    for e in bgm_peaks:
        weight = e[2]
        if weight>=threshold:
            mu = e[0]
            sigma =e[1]
            print(sigma)
            gm = stats.norm(mu, sigma)
            if _tmp is None:
                _tmp = weight*gm.pdf(spe.x)
            else:
                _tmp = _tmp + weight*gm.pdf(spe.x)
            #print(_tmp)
            ax1.scatter(mu, weight*gm.pdf(mu),label=e[1])
            ax2.plot(spe.x, weight*gm.pdf(spe.x),'+')
            ax2.plot(spe.x,y_norm,':')
            ax2.plot(spe.x,new_spe.y,'*')
    ax1.plot(spe.x,_tmp,'-')
    return (ax1,ax2)

bgm_peaks = [[mean[0], np.sqrt(cov[0][0]), weight]
                     for mean, cov, weight in
                     zip(bgm.means_, bgm.covariances_, bgm.weights_)]
bgm_peaks = sorted(bgm_peaks, key=lambda x: x[2], reverse=True)
n_peaks = (np.round(bgm.weights_, 2) > 0).sum()
bgm_peaks = bgm_peaks[:n_peaks]
print(bgm_peaks)
plotdist(bgm_peaks,spe)