# 4. Stitch outputs together
- Once you have fitted the weak, medium and dense diads, stitch them all together, and apply the Ne correction model

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
import os
from pickle import load
import pickle
import DiadFit as pf
pf.__version__

'0.0.59'

In [8]:
MasterFolder = r"P:\WORK-GENERAL\POSTDOC-UCB\BERKELEY-VIBE\Documents\Projects\Data\Hawaii_FI\Data\Raman\Leilani-2018"
DayFolder = os.path.join(MasterFolder, "Leilani_2018FI_Ap523")

meta_path=DayFolder +  '\Metadata'
spectra_path=DayFolder + '\Spectra'
filetype='headless_txt'

if not os.path.exists(MasterFolder + '\OUTPUT'):
    os.mkdir(MasterFolder +'\OUTPUT')


output_path=MasterFolder +'\OUTPUT'


## Load in specra, and combine

In [9]:
from os import path
if path.exists('Discarded_df.xlsx'):
    discard=pd.read_excel('Discarded_df.xlsx')
else:
    discard=None
if path.exists('Weak_Diads.xlsx'):
    grp1=pd.read_excel('Weak_Diads.xlsx')
else:
    grp1=None
if path.exists('Medium_Diads.xlsx'):
    grp2=pd.read_excel('Medium_Diads.xlsx')
else:
    grp2=None
if path.exists('Strong_Diads.xlsx'):
    grp3=pd.read_excel('Strong_Diads.xlsx')
else:
    grp3=None
df2=pd.concat([grp1, grp2, grp3], axis=0)
if discard is not None:
    discard_cols=discard[discard.columns.intersection(df2.columns)]
    df2=pd.concat([df2, discard_cols])

In [10]:
df2.head()

Unnamed: 0.1,Unnamed: 0,filename,Splitting,Split_err_abs,Split_err_quadrature,Diad1_Combofit_Cent,Diad1_cent_err,Diad1_Combofit_Height,Diad1_Voigt_Cent,Diad1_Voigt_Area,...,HB2_Sigma,C13_Cent,C13_Area,C13_Sigma,Diad2_Gauss_Cent,Diad2_Gauss_Area,Diad2_Gauss_Sigma,Diad1_Gauss_Cent,Diad1_Gauss_Area,Diad1_Gauss_Sigma
0,0,K21-71-FI#1-12mwx3,103.293712,0.012129,0.009098,1285.683789,0.008211,772.03975,1285.683789,1167.328533,...,,,,,,,,,,
1,0,K21-71-FI#1-6mwx1,103.318938,0.003384,0.003384,1285.61842,0.0,710.826095,1285.61842,1111.194572,...,,,,,,,,,,
2,0,K21-71-FI#1-6mwx2,103.318954,0.009116,0.006657,1285.627372,0.005733,721.458388,1285.627372,1114.437529,...,,,,,,,,,,
3,0,K21-71-FI#1-6mwx3_CRR_DiadFit,103.30802,0.005271,0.003897,1285.637851,0.001831,645.98142,1285.637851,1004.206661,...,,,,,,,,,,
4,0,K21-72-FI#1,102.995137,0.049546,0.03618,1286.494784,0.031159,82.138042,1286.494784,71.887385,...,,,,,,,,,,


## First, get the metadata for this day

In [11]:
# Put the common string you can in all your Ne lines here
ID_str='_'
file_ext_meta='txt' # Will only take files of this type 
diad_meta=pf.get_files(path=meta_path,
file_ext='txt', exclude_str=['N', 'Si', 'series','IMG','Cap','window','nodiad'],
 sort=True)
diad_meta

['K21-71-FI#1-12mwx1.txt',
 'K21-71-FI#1-12mwx2.txt',
 'K21-71-FI#1-12mwx3.txt',
 'K21-71-FI#1-6mwx1.txt',
 'K21-71-FI#1-6mwx2.txt',
 'K21-71-FI#1-6mwx3.txt',
 'K21-72-FI#1.txt',
 'K21-72-FI#2.txt',
 'K21-73-FI#2.txt',
 'K21-74-FI#1.txt',
 'K21-74-FI#2.txt',
 'K21-74-FI#3.txt',
 'K21-74-FI#4.txt',
 'K21-74-FI#5.txt',
 'K21-74-FI#5rep2.txt',
 'K21-74-FI#6.txt',
 'K21-75-FI#1 (1).txt',
 'K21-75-FI#1.txt',
 'K21-75-FI#2 (1).txt',
 'K21-75-FI#2.txt',
 'K21-75-FI#3.txt',
 'K21-77-FI#1.txt',
 'K21-78b-FI#2.txt',
 'K21-78b-FI#3.txt',
 'K21-78b-FI#4.txt',
 'K21-78b-FI#5.txt']

## Now get all the important information out of the metadata files

In [12]:
meta=pf.stitch_metadata_in_loop_witec(path=meta_path, 
Allfiles=diad_meta, prefix=False,
trupower=True)
meta['filename'].iloc[0]

100%|██████████| 26/26 [00:00<00:00, 81.98it/s]

Done





'K21-71-FI#1-12mwx1.txt'

## Then get a simple file name you can stitch with the spectra

In [13]:
file_m=pf.extracting_filenames_generic(names=meta['filename'],
    prefix=False, str_prefix=" ",
   file_type='.txt')
# Checks they are all unique
file_m[0]

good job, no duplicate file names


'K21-71-FI#1-12mwx1'

## Now get filename from spectra into same form as metadata

In [14]:
# Remove these to get the pure file name
file_s=pf.extracting_filenames_generic(
    prefix=False, str_prefix=" ",
    names=df2['filename'].reset_index(drop=True),
   file_type='.txt')

# Remove the cosmic rays, as doesnt have it in the metatdata
file_s = np.char.replace(file_s.astype(str), "_CRR_DiadFit", "")

good job, no duplicate file names


In [15]:
file_s

array(['K21-71-FI#1-12mwx3', 'K21-71-FI#1-6mwx1', 'K21-71-FI#1-6mwx2',
       'K21-71-FI#1-6mwx3', 'K21-72-FI#1', 'K21-72-FI#2', 'K21-73-FI#2',
       'K21-74-FI#1', 'K21-74-FI#2', 'K21-74-FI#3', 'K21-74-FI#4',
       'K21-74-FI#5', 'K21-74-FI#5rep2', 'K21-74-FI#6', 'K21-75-FI#1',
       'K21-75-FI#2 (1)', 'K21-75-FI#2', 'K21-75-FI#3', 'K21-77-FI#1',
       'K21-78b-FI#2', 'K21-78b-FI#3', 'K21-78b-FI#4', 'K21-78b-FI#5',
       'K21-71-FI#1-12mwx1', 'K21-71-FI#1-12mwx2', 'K21-75-FI#1 (1)'],
      dtype='<U18')

In [16]:
# Combining them together
meta['name_for_matching']=file_m
df2['name_for_matching']=file_s
df2['Name_for_Secondary_Phases']=file_s
df_combo=df2.merge(meta, on='name_for_matching')

In [17]:
Ne_corr=pf.calculate_Ne_corr_std_err_values(pickle_str='polyfit_data.pkl', 
    new_x=df_combo['sec since midnight'], CI=0.67)
Ne_corr.head()


1


Unnamed: 0,time,preferred_values,lower_values,upper_values
0,43968.0,0.997978,0.997972,0.997984
1,42902.0,0.997989,0.997983,0.997995
2,43148.0,0.997986,0.99798,0.997992
3,43412.0,0.997984,0.997977,0.99799
4,45585.0,0.997964,0.997958,0.99797


In [18]:

split_err=pf.propagate_errors_for_splitting(Ne_corr, df_combo)
df_combo_out=df_combo.copy()
df_combo_out.insert(1, 'Corrected_Splitting', df_combo['Splitting']*Ne_corr['preferred_values'])
df_combo_out.insert(2, 'Corr_Split+1σ', df_combo_out['Corrected_Splitting']+split_err)
df_combo_out.insert(3, 'Corr_Split-1σ',df_combo_out['Corrected_Splitting']-split_err)
df_combo_out.insert(4, 'Corr_Split_1σ_val', split_err)

nm=os.path.basename(DayFolder)
df_combo_out.to_excel(output_path+'/'+nm+'_FI_fitting.xlsx')

## If you have secondary phases, now is the time to merge those in

In [19]:
if path.exists('Carb_Peak_fits.xlsx'):
    Carb=pd.read_excel('Carb_Peak_fits.xlsx')
else:
    Carb=None
if path.exists('SO2_Peak_fits.xlsx'):
    SO2=pd.read_excel('SO2_Peak_fits.xlsx')
else:
    SO2=None
if SO2 is not None and Carb is not None:
    Sec_Phases=pd.merge(SO2, Carb, on='filename', how='outer')
elif SO2 is not None and Carb is None:
    Sec_Phases=SO2
elif SO2 is None and Carb is not None:
    Sec_Phases=Carb
else:
    Sec_Phases=None
Sec_Phases.head()

Unnamed: 0,filename,Peak_Cent_SO2,Peak_Area_SO2,Peak_Height_SO2,Model_name_x,Peak_Cent_Carb,Peak_Area_Carb,Peak_Height_Carb,Model_name_y
0,K21-71-FI#1-12mwx1.txt,1151.035056,187.188918,118.126661,Spline,,,,
1,K21-71-FI#1-12mwx2.txt,1151.153045,122.623278,83.250213,Spline,,,,
2,K21-71-FI#1-12mwx3.txt,1151.09405,71.521268,50.960985,Spline,,,,
3,K21-71-FI#1-6mwx1.txt,1151.153045,74.554449,58.661018,Spline,,,,
4,K21-71-FI#1-6mwx2.txt,1151.09405,79.295729,56.970757,Spline,,,,


In [20]:
# Remove these to get the pure file name
if Sec_Phases is not None:
    file_sec_phase=pf.extracting_filenames_generic(
        prefix=False, str_prefix=" ",
        names=Sec_Phases['filename'].reset_index(drop=True),
       file_type='.txt')

    file_sec_phase

good job, no duplicate file names


In [21]:
file_sec_phase

array(['K21-71-FI#1-12mwx1', 'K21-71-FI#1-12mwx2', 'K21-71-FI#1-12mwx3',
       'K21-71-FI#1-6mwx1', 'K21-71-FI#1-6mwx2', 'K21-71-FI#1-6mwx3',
       'K21-75-FI#1', 'K21-77-FI#1'], dtype=object)

In [22]:
df_combo['name_for_matching']

0     K21-71-FI#1-12mwx3
1      K21-71-FI#1-6mwx1
2      K21-71-FI#1-6mwx2
3      K21-71-FI#1-6mwx3
4            K21-72-FI#1
5            K21-72-FI#2
6            K21-73-FI#2
7            K21-74-FI#1
8            K21-74-FI#2
9            K21-74-FI#3
10           K21-74-FI#4
11           K21-74-FI#5
12       K21-74-FI#5rep2
13           K21-74-FI#6
14           K21-75-FI#1
15       K21-75-FI#2 (1)
16           K21-75-FI#2
17           K21-75-FI#3
18           K21-77-FI#1
19          K21-78b-FI#2
20          K21-78b-FI#3
21          K21-78b-FI#4
22          K21-78b-FI#5
23    K21-71-FI#1-12mwx1
24    K21-71-FI#1-12mwx2
25       K21-75-FI#1 (1)
Name: name_for_matching, dtype: object

In [23]:
df_combo['name_for_matching']=df_combo['Name_for_Secondary_Phases']

if Sec_Phases is not None:
    Sec_Phases['name_for_matching']=file_sec_phase
    df_combo_sec_phase=df_combo_out.merge(Sec_Phases, 
    on='name_for_matching', how='outer')
    
else:
    df_combo_sec_phase=df_combo_out

### Add some more useful columns

In [24]:
if 'Peak_Area_Carb' in df_combo_sec_phase.columns:
    df_combo_sec_phase['Carb_Diad_Ratio']=(df_combo_sec_phase['Peak_Area_Carb']/(df_combo_sec_phase['Diad1_Voigt_Area']
                      +df_combo_sec_phase['Diad2_Voigt_Area']))
if 'Peak_Area_SO2' in df_combo_sec_phase.columns:
    df_combo_sec_phase['SO2_Diad_Ratio']=(df_combo_sec_phase['Peak_Area_SO2']/(df_combo_sec_phase['Diad1_Voigt_Area']
                      +df_combo_sec_phase['Diad2_Voigt_Area']))
    
df_combo_sec_phase.head()

Unnamed: 0.1,Unnamed: 0,Corrected_Splitting,Corr_Split+1σ,Corr_Split-1σ,Corr_Split_1σ_val,filename_x,Splitting,Split_err_abs,Split_err_quadrature,Diad1_Combofit_Cent,...,Peak_Cent_SO2,Peak_Area_SO2,Peak_Height_SO2,Model_name_x,Peak_Cent_Carb,Peak_Area_Carb,Peak_Height_Carb,Model_name_y,Carb_Diad_Ratio,SO2_Diad_Ratio
0,0,103.084879,103.09398,103.075778,0.009101,K21-71-FI#1-12mwx3,103.293712,0.012129,0.009098,1285.683789,...,1151.09405,71.521268,50.960985,Spline,,,,,,0.023812
1,0,103.111124,103.11456,103.107688,0.003436,K21-71-FI#1-6mwx1,103.318938,0.003384,0.003384,1285.61842,...,1151.153045,74.554449,58.661018,Spline,,,,,,0.026495
2,0,103.110887,103.11756,103.104214,0.006673,K21-71-FI#1-6mwx2,103.318954,0.009116,0.006657,1285.627372,...,1151.09405,79.295729,56.970757,Spline,,,,,,0.028287
3,0,103.099708,103.103648,103.095768,0.00394,K21-71-FI#1-6mwx3_CRR_DiadFit,103.30802,0.005271,0.003897,1285.637851,...,1151.005558,79.734515,47.758407,Spline,,,,,,0.031542
4,0,102.78542,102.821532,102.749309,0.036111,K21-72-FI#1,102.995137,0.049546,0.03618,1286.494784,...,,,,,,,,,,


In [25]:
df_combo_sec_phase.to_excel(output_path+'/'+nm+'_FI_fitting_w_sec_phases.xlsx')

In [26]:
from winotify import Notification, audio

toast= Notification(app_id="VSCode",title="Notebook completed",
                    msg="Step4_Stitch_Outputs_Together is done!",
                    duration="short")
toast.set_audio(audio.Mail,loop=False)
toast.show()