# Example

- check if .hscfg file in your home folder is correctly configured (see hsdsaccess_example.ipynb)


In [None]:
import h5pyd

def print_dataset(parentdomain,domain):
    print(parentdomain,domain)

def visit_domain(topdomain="/Round_Robin_1/",process_dataset=None,kwargs={}):
    if topdomain.endswith("/"):
        with h5pyd.Folder(topdomain) as domain:
            n = domain._getSubdomains()
            for domain in domain._subdomains:
                #print(domain)
                if domain["class"]=="folder":
                    visit_domain("{}/".format(domain["name"]),process_dataset,kwargs)
                else:
                    if process_dataset == None:
                        print_dataset(topdomain,domain["name"])
                    else:
                        process_dataset(topdomain,domain["name"],**kwargs)
    else:
        if process_dataset == None:
            print_dataset(None,topdomain)
        else:
            process_dataset(None,topdomain,**kwargs)        

visit_domain(topdomain="/Round_Robin_1/",process_dataset=print_dataset)                

In [None]:
import matplotlib.pyplot as plt

def filter_dataset(topdomain,domain,process_file,sample=None,wavelength=None,instrument=None,provider=None,investigation=None):
    with h5pyd.File(domain) as dataset:
        if (sample != None) and (dataset["annotation_sample"].attrs["sample"] == sample):
            process_file(topdomain,domain)

def plot_dataset(topdomain,domain):
    print_dataset(topdomain,domain)
    with h5pyd.File(domain) as f:    
        dset = f["raw"]
        plt.plot(dset[0],dset[1])
        plt.xlabel(dset.dims[0].label)
        plt.ylabel(dset.dims[1].label)
        sample = f["annotation_sample"].attrs["sample"]
        instrument = f["annotation_study"].attrs["instrument"]
        wavelength = f["annotation_study"].attrs["wavelength"]
        partner = f["annotation_study"].attrs["provider"]
        investigation = f["annotation_study"].attrs["investigation"]
        plt.suptitle("{} ({},{}nm) by {} [{}]".format(sample, instrument, wavelength, partner, investigation))    



In [None]:
query_sample="S0N"
query_sample="Neon"

In [None]:
visit_domain("/Round_Robin_1/FMNT-Madrid/",
        process_dataset=filter_dataset,kwargs={"process_file" : plot_dataset,"sample": query_sample})

In [None]:
import ramanchada2 as rc2
test = "/Round_Robin_1/ICV-CSIC/Zolix Finder Edge/785/PST02_Zolix785_Probe_100_190ms.cha"
test = "/Round_Robin_1/ICV-CSIC/Zolix Finder Edge/785/Ne_785nm_Zolix_6ms_v2.cha"
test = "/Round_Robin_1/FMNT-Madrid/BWTek iRaman/532/S0N10_iR532_Probe_100_30000msx3.cha"
spe = rc2.spectrum.from_chada(test,h5module=h5pyd)
spe.plot()

In [None]:
import pandas as pd
def find_peaks(topdomain,domain,results={}):
    spe = rc2.spectrum.from_chada(domain,h5module=h5pyd)
    spe = spe.normalize()
    kw_sharpening = dict(filter_fraction=1, sig_width=.4, der2_factor=0, der4_factor=.1)
    #kw_sharpening = dict(filter_fraction=.6, sig_width=.5, der2_factor=1, der4_factor=.1)
    #sharpened = spe.subtract_moving_minimum(60).normalize().derivative_sharpening(**kw_sharpening)    
    peak_candidates = spe.find_peak_groups(
        prominence=.005,
        wlen=40,
        width=1,
        n_sigma_group=1.5,
        moving_minimum_window=40,
        kw_derivative_sharpening=kw_sharpening) 
    fit_res = spe.fit_peak_groups(model='Voigt', peak_candidate_groups=peak_candidates) 
    results[domain] = fit_res         
  

In [None]:
results = {}
visit_domain(test,
        process_dataset=find_peaks,kwargs={"results": results})

In [None]:
df = None
for domain in results:
    fit_res = results[domain]
    tmp= pd.DataFrame(
                [
                    dict(name=f'g{group:02d}_{key}', value=val.value, stderr=val.stderr)
                    for group, res in enumerate(fit_res)
                    for key, val in res.params.items()
                ]
            )
    tmp["source"]=domain
    if df == None:
        df = tmp
    else:
        df = pd.concat([df, tmp]) 
df[['group', 'model', 'param']]=df["name"].str.split("_", expand=True)        
display(df)      

In [None]:
pd.set_option('display.max_rows', None)
import plotly.express as px
import numpy as np
_filter = df["model"]!="bl"
table = pd.pivot_table(df.loc[_filter], values=['value','stderr'], index=['source','group', 'model'],columns=['param'],
                    aggfunc={'value': np.mean, 'stderr' : np.mean}).reset_index()
table.columns = [' '.join(col).strip() for col in table.columns.values]
table_stats = table.describe()         
table_stats


In [None]:
_filter1 = table["value amplitude"] > table_stats.iloc[6]["value amplitude"]
_filter2 = table["value height"] > table_stats.iloc[6]["value height"]
table_filtered = table.loc[_filter1 & _filter2]
fig = px.scatter(table_filtered, x="value center", y="stderr center")
fig.show()
display(table_filtered)

In [None]:
#PST_peaks = (621,795,1001,1031,1155,1450,1583,1602)