# Evaluate MSMS-Set Ouputs & Generate Final Observation Table

In [None]:
import pandas as pd
import glob
import os

from ipywidgets import widgets, interact, interactive, interactive_output, Layout, Style, HBox, VBox
from IPython.display import clear_output
import shutil

In [None]:
class PDF(object):
    def __init__(self, pdf, size=(200,200)):
        self.pdf = pdf
        self.size = size

    def _repr_html_(self):
        return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdf, self.size)

    def _repr_latex_(self):
        return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)

## Rerun Notebook From Here with Other Chromatography if Needed

In [None]:
chromatography = 'c18' #'hilic' or 'c18'

In [None]:
msms_set_output_dir = 'downloads/plant_standards_diagnostic_plots_{chromatography}'.format(chromatography=chromatography)
msms_set_output_good_dir = 'downloads/plant_standards_diagnostic_plots_{chromatography}/good'.format(chromatography=chromatography)
msms_set_output_ambiguous_dir = 'downloads/plant_standards_diagnostic_plots_{chromatography}/ambiguous'.format(chromatography=chromatography)

output_df = pd.read_csv(os.path.join(msms_set_output_dir, 'diagnostic_peakheight_and_centroids.csv'))

In [None]:
output_df['filter_override'] = 'False'
output_df['group'] = ''
for idx, row in output_df.iterrows():
    output_df.loc[idx, 'group'] = str(row['compound_name']).split('-')[0]

In [None]:
output_df.head()

In [None]:
plot_files = glob.glob(os.path.join(msms_set_output_dir, '*.pdf'))

plot_filenames = []

for f in plot_files:
    plot_filenames.append(os.path.basename(f))
    
plot_filenames.append('')

In [None]:
plot_filenames[0]

## Adjust PDF Viewer Size

In [None]:
pdf_view_size = (2000, 800)
test_pdf = PDF(plot_files[0], size=pdf_view_size)
test_pdf

## Filter Compounds into "Ambiguous" and "Good" Sets

In [None]:
compound_drop = widgets.Dropdown(
    options=plot_filenames,
    value='',
    description='Manual Select:',
    disabled=False, 
    style={'description_width':'initial'}
)

override_filter_button = widgets.ToggleButton(
    value=False,
    description='OVERRIDE FILTER',
    disabled=False,
    font_weight='bold',
    button_style='warning', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Don\'t use filtering logic for adducts, manually curate after',
    icon='ban' # (FontAwesome names without the `fa-` prefix)
)

good_button = button = widgets.Button(
    description='Good',
    disabled=False,
    style=dict(
    font_weight='bold',
    button_color='lightgreen')
)

ambiguous_button = button = widgets.Button(
    description='Ambiguous',
    disabled=False,
    style=dict(
    font_weight='bold',
    button_color='lightcoral')
)

def filter_adducts(idx, rt_diff_max = 0.2, intensity_min = 1000000):
    name = plot_filenames[idx].split('-')[0]
    adduct_df = output_df[output_df['group']==name]
    filtered_adduct_df = pd.DataFrame()
    
    peak_height_max = adduct_df['peak_height'].max()
    rt_set = adduct_df[adduct_df['peak_height']==peak_height_max]['rt_peak'].values[0]
    rt_min = rt_set  - rt_diff_max
    rt_max = rt_set + rt_diff_max
    
    for idx, row in adduct_df.iterrows():
        if row['rt_peak'] >= rt_min and row['rt_peak'] <= rt_max and row['peak_height'] >= intensity_min:
            filtered_adduct_df = pd.concat([filtered_adduct_df, output_df.iloc[[idx]]])
            
    return filtered_adduct_df

class Events:
    
    ind = -1
    ind_max = len(plot_files) - 1
    
    filtered_good_df = pd.DataFrame()
    ambiguous_df = pd.DataFrame()
    
    def good_button_click(self, event):
        with output:
            clear_output()
            
            if plot_files[self.ind] in plot_files and self.ind != -1:
                shutil.copyfile(plot_files[self.ind], os.path.join(msms_set_output_good_dir, plot_filenames[self.ind]))
                
                if override_filter_button.value == False:
                
                    filtered_adduct_df = filter_adducts(self.ind)
                    
                if override_filter_button.value == True:
                    
                    name = plot_filenames[self.ind].split('-')[0]
                    adduct_df = output_df[output_df['group']==name]
                    adduct_df = adduct_df.replace('False', 'True')
                    
                    filtered_adduct_df = adduct_df
                
                self.filtered_good_df = pd.concat([self.filtered_good_df, filtered_adduct_df])
            
            if self.ind < self.ind_max:
                self.ind += 1
                pdf = PDF(plot_files[self.ind], size=pdf_view_size)
                
                return display(pdf), print(self.ind)
            if self.ind == self.ind_max:
                return print('Compound Evaluation Complete!')
            
    def ambiguous_button_click(self, event):
        with output:
            clear_output()
            
            if plot_files[self.ind] in plot_files and self.ind != -1:
                shutil.copyfile(plot_files[self.ind], os.path.join(msms_set_output_ambiguous_dir, plot_filenames[self.ind]))
                
                name = plot_filenames[self.ind].split('-')[0]
                adduct_df = output_df[output_df['group']==name]
                self.ambiguous_df = pd.concat([self.ambiguous_df, adduct_df])
                
            if self.ind < self.ind_max:
                self.ind += 1
                pdf = PDF(plot_files[self.ind], size=pdf_view_size)
                return display(pdf), print(self.ind)
            if self.ind == self.ind_max:
                return print('Compound Evaluation Complete!')
            
    def on_select(self, event):
        with output:
            clear_output()
            selected_index = plot_filenames.index(event)
            self.ind = selected_index
        
            pdf = PDF(plot_files[self.ind], size=pdf_view_size)
            return display(pdf)
            

In [None]:
events = Events()
output = widgets.Output()

good_button.on_click(events.good_button_click)
ambiguous_button.on_click(events.ambiguous_button_click)
manual_select_dropdown = interactive(events.on_select, event=compound_drop)

button_container = HBox(layout=Layout(width='100%',display='inline-flex',flex_flow='row wrap', align_items='center'))
button_container.children = [good_button, ambiguous_button, override_filter_button, compound_drop]

display(button_container, output)

## Save 'Good' and 'Ambiguous' Observation Tables

In [None]:
events.ambiguous_df.to_csv(os.path.join(msms_set_output_ambiguous_dir, 'ambiguous_observation_table.csv'))
events.filtered_good_df.to_csv(os.path.join(msms_set_output_good_dir, 'final_observation_table.csv'))

In [None]:
events.filtered_good_df

## Compare "Good" Compounds from Both Chromatographies, Display List of "Ambiguous" Compounds In Both

In [None]:
chromatographies = ['hilic', 'c18'] #list of chromatographies used

good_plot_filenames = []
ambiguous_plot_filenames = []

for ch in chromatographies:
    
    msms_set_output_good_dir = 'downloads/plant_standards_diagnostic_plots_{chromatography}/good'.format(chromatography=ch)
    msms_set_output_ambiguous_dir = 'downloads/plant_standards_diagnostic_plots_{chromatography}/ambiguous'.format(chromatography=ch)
    
    good_plot_files = glob.glob(os.path.join(msms_set_output_good_dir, '*.pdf'))
    
    for f in good_plot_files:
        good_plot_filenames.append(os.path.basename(f))
        
    ambiguous_plot_files = glob.glob(os.path.join(msms_set_output_ambiguous_dir, '*.pdf'))
    
    for f in ambiguous_plot_files:
        ambiguous_plot_filenames.append(os.path.basename(f))


In [None]:
set(ambiguous_plot_filenames) - set(good_plot_filenames)