# Automated Multiple Reaction Monitoring (MRM)-profiling and Ozone Electrospray Ionizaton (OzESI)-MRM Informatics Platform for High-throughput Lipidomics


In this jupyter notebook you will automate the data analysis of the lipidome. This is a challenging problem to perform manually due to the diverse nature of lipids and the many potential isomers. In this notebook you will analyze mzML files containing data from lipid MRMs, with ozone off and ozone on. The goal is to identify possible double-bond locations in a lipid, in this case a TAG (triacylglycerols).

In [1]:
from IPython.display import Image

![title](Figures/agilent_lcms.png)

The examples shown here were run on an Agilent 6495C Triple Quadrupole LC/MS (example shown above) that has been connected to an ozone line (not shown in picture) for ozoneolysis of lipids.

![title](Figures/TAG_example.png)
Here is an example of a TAG. Notice how many possibilities there are for locations of one double-bond there could be and how convoluted the analysis can become! This image is obtained from LipidMaps.org

Import all necessary libraries

In [2]:
#Import all the necessary libraries
import pymzml
import csv
import os
import pandas as pd
import numpy as np
import math
from matplotlib import pyplot as plt
import re
import plotly.express as px
from collections import defaultdict



No module named 'ms_deisotope._c.averagine' averagine
No module named 'ms_deisotope._c.scoring'
No module named 'ms_deisotope._c.deconvoluter_base'
No module named 'ms_deisotope._c.deconvoluter_base'
No module named 'ms_deisotope._c.deconvoluter_base'


In [3]:
###Importing Variables for all functions

data_base_name_location = 'lipid_database/Lipid_Database.xlsx'####Lipid database with Standard Carnitines
mzml_folder = './data_mzml/04-29-2023_mzml/test/'
tolerance = 0.3
remove_std = True

# Example usage:
folder_name_to_save = 'TEST_04-29-2023_v3'
file_name_to_save = 'TEST_04-29-2023_v3'
save_data= True





Lipid MRM Parsing and Matching Functions

In [6]:

###All functions

#Function to read in MRM database
#Option to remove STDs from database##Not finished need option to use another database with no qualitative ACs


def read_mrm_list(filename,remove_std = True):
    mrm_list_new = pd.read_excel(filename, sheet_name=None)
    mrm_list_new = pd.concat(mrm_list_new, ignore_index=True)
    mrm_list_offical = mrm_list_new[['Compound Name', 'Parent Ion', 'Product Ion', 'Class']]
    # Add underscore to middle of columns names
    mrm_list_offical.columns = mrm_list_offical.columns.str.replace(' ', '_')
    # Round Parent Ion and Product Ion to 1 decimal place
    mrm_list_offical['Parent_Ion'] = np.round(mrm_list_offical['Parent_Ion'],1)
    mrm_list_offical['Product_Ion'] = np.round(mrm_list_offical['Product_Ion'],1)
    # Create transition column by combining Parent Ion and Product Ion with arrow between numbers
    mrm_list_offical['Transition'] = mrm_list_offical['Parent_Ion'].astype(str) + ' -> ' + mrm_list_offical['Product_Ion'].astype(str)
    # Change column compound name to lipid
    mrm_list_offical = mrm_list_offical.rename(columns={'Compound_Name': 'Lipid'})
    # Make a column called Class match lipid column to lipid types
    if remove_std == True:
        lipid_class = mrm_list_offical['Class'].unique()
        lipid_class_to_keep = ['PS','PG','CE','PC', 'DAG', 'PE', 'TAG', 'FA', 'Cer', 'CAR', 'PI','SM']
        mrm_list_offical = mrm_list_offical[mrm_list_offical['Class'].isin(lipid_class_to_keep)]
    return mrm_list_offical

#OzESI
OzESI_time = {}
def mzml_parser(file_name):
    df = pd.DataFrame(columns=['Lipid','Parent_Ion','Product_Ion','Intensity','Transition','Class','Sample_ID'])
    data_folder = os.listdir(file_name) #Path to the mzml files
    data_folder.sort()
    path_to_mzml_files = file_name

    
    

    for file in data_folder:
            if file.endswith('.mzML'):

                    run = pymzml.run.Reader(path_to_mzml_files+file, skip_chromatogram=False) #Load the mzml file into the run object



                    df_all = pd.DataFrame(columns=['Lipid','Parent_Ion','Product_Ion','Intensity','Transition','Class','Sample_ID']) #Create empty pandas dataframe to store the data

                    #create pandas dataframe to store the data with the columns Parent Ion, Product Ion, Intensity, Transition Lipid and Class
                   
                    q1_mz = 0 #Create empty variables to store the Q1 and Q3 m/z values
                    q3_mz = 0
                    count = 0 #Create a counter to keep track of the number of transitions
                    for spectrum in run:
                        if isinstance(spectrum, pymzml.spec.Chromatogram):
                            for time, intensity in spectrum.peaks():
                                OzESI_time[time] = np.round(intensity), q1_mz, q3_mz

                            for element in spectrum.ID.split(' '):
                                    intensity_store = np.array([])
                                    if 'Q1' in element:
                                            q1 = element.split('=')
                                            q1_mz= np.round((float(q1[1])),1)

                                    if 'Q3' in element:
                                
                                            q3 = element.split('=')
  
                                            q3_mz=np.round(float(q3[1]),1)


                                            for mz,intensity in spectrum.peaks(): #Get the m/z and intensity values from the spectrum
                                                    intensity_store = np.append(intensity_store,intensity) #Store the intensity values in an array



                                    if 'Q3' in element:
                                            # print(intensity_sum)
                                            intensity_sum = np.sum(intensity_store) #Sum the intensity values
                                            df_all.loc[count,'Parent_Ion'] = q1_mz #Store the Q1 and Q3 m/z values in the pandas dataframe
                                            df_all.loc[count,'Product_Ion'] = q3_mz
                                            #round the Q1 and Q3 m/z values to 1 decimal places
                                            df_all.loc[count,'Parent_Ion'] = np.round(df_all.loc[count,'Parent_Ion'],1)
                                            df_all.loc[count,'Product_Ion'] = np.round(df_all.loc[count,'Product_Ion'],1)
                                            df_all.loc[count,'Intensity'] = intensity_sum #Store the intensity values in the pandas dataframe
                                            df_all.loc[count,'Transition'] = str(q1_mz)+ ' -> '+ str(q3_mz) #Store the transition values in the pandas dataframe
                                            #add file name to Sample_ID column without the mzmL extension
                                            df_all.loc[count,'Sample_ID'] = file[:-5]
                                            count+=1

            #append df_all to df
            df = df.append(df_all, ignore_index=True)
    return df

# Function to create an ion dictionary from an MRM database DataFrame
def create_ion_dict(mrm_database):
    ion_dict = defaultdict(list)
    # Iterate through the rows of the MRM database DataFrame
    for index, row in mrm_database.iterrows():
        # Add a tuple with Lipid and Class to the ion dictionary using Parent_Ion and Product_Ion as the key
        ion_dict[(row['Parent_Ion'], row['Product_Ion'])].append((row['Lipid'], row['Class']))
    return ion_dict

# Function to check if the absolute difference between two values is within a given tolerance
def within_tolerance(a, b, tolerance=0.1):
    return abs(a - b) <= tolerance

# Function to match the ions in a DataFrame row with the ions in an ion dictionary
def match_ions(row, ion_dict, tolerance=0.1):
    ions = (row['Parent_Ion'], row['Product_Ion'])
    matched_lipids = []
    matched_classes = []

    # Iterate through the ion dictionary
    for key, value in ion_dict.items():
        # Check if both the Parent_Ion and Product_Ion values are within the specified tolerance
        if within_tolerance(ions[0], key[0], tolerance) and within_tolerance(ions[1], key[1], tolerance):
            # If within tolerance, extend the matched_lipids and matched_classes lists with the corresponding values
            matched_lipids.extend([match[0] for match in value])
            matched_classes.extend([match[1] for match in value])

    # If any matches were found, update the Lipid and Class columns in the row
    if matched_lipids and matched_classes:
        row['Lipid'] = ' | '.join(matched_lipids)
        row['Class'] = ' | '.join(matched_classes)

    return row

####Combined functions for Matching

def match_lipids_parser(mrm_database,df, tolerance=0.3):
    ion_dict = create_ion_dict(mrm_database)
    # Assuming you have the df DataFrame to apply the match_ions function
    df_matched = df.apply(lambda row: match_ions(row, ion_dict=ion_dict, tolerance=tolerance), axis=1)


    # df_matched = df_matched.dropna()
    
    return df_matched


def save_dataframe(df, folder_name, file_name, max_attempts=5):
    folder_path = f'data_results/data/data_matching/{folder_name}'
    os.makedirs(folder_path, exist_ok=True)

    for i in range(max_attempts):
        file_path = f'{folder_path}/{file_name}.csv'
        if not os.path.isfile(file_path):
            df.to_csv(file_path, index=False)
            print(f"Saved DataFrame to {file_path}")
            break
    else:
        print(f"Failed to save DataFrame after {max_attempts} attempts.")
        return None


def full_parse(data_base_name_location,mzml_folder, folder_name_to_save, file_name_to_save,tolerance,remove_std = True,
               save_data=False):
    mrm_database = read_mrm_list(data_base_name_location,remove_std=remove_std)
    df = mzml_parser(mzml_folder)
    df_matched = match_lipids_parser(mrm_database,df, tolerance=tolerance)
    
    if save_data == True:
        
        save_dataframe(df_matched, folder_name_to_save, file_name_to_save)

    return df_matched





Run all Lipid MRM functions

In [7]:
df_matched = full_parse(data_base_name_location,mzml_folder, folder_name_to_save, 
                        file_name_to_save,tolerance, remove_std = remove_std,save_data=save_data)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mrm_list_offical['Parent_Ion'] = np.round(mrm_list_offical['Parent_Ion'],1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mrm_list_offical['Product_Ion'] = np.round(mrm_list_offical['Product_Ion'],1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mrm_list_offical['Transition'] = mrm_list_offical['

Failed to save DataFrame after 5 attempts.


  df = df.append(df_all, ignore_index=True)


LC OzESI Lipid Parsing and Retention Time Functions

In [12]:


def process_chromatogram(OzESI_time):
    # Create dataframe from OzESI_time dictionary
    OzESI_rt_df = pd.DataFrame(list(OzESI_time.items()), columns=['Retention_Time', 'intensity'])
    
    # Split intensity column into three columns intensity, Parent_Ion and Product_Ion
    OzESI_rt_df[['intensity','Parent_Ion','Product_Ion']] = pd.DataFrame(OzESI_rt_df['intensity'].tolist(), index=OzESI_rt_df.index)
    
    # Round retention Retention_Time to 1 decimal place
    OzESI_rt_df['Retention_Time'] = round(OzESI_rt_df['Retention_Time'], 2)
    
    
    # Create a column called Transition with the Parent_Ion and Product_Ion
    OzESI_rt_df['Transition'] = OzESI_rt_df['Parent_Ion'].astype(str) + ' -> ' + OzESI_rt_df['Product_Ion'].astype(str)
    
    # Sort dataframe by intensity, drop duplicates, and find the 10 largest values
    OzESI_rt_df_sorted = OzESI_rt_df.sort_values(['intensity'], ascending=False).drop_duplicates(['Parent_Ion','Product_Ion']).sort_index()
    OzESI_rt_df_top = OzESI_rt_df_sorted.nlargest(10, 'intensity')

    print('OzESI 10 largest: \n', OzESI_rt_df_top)
    
    # # Find peaks in the intensity column and plot the chromatogram
    # peaks, _ = find_peaks(OzESI_rt_df['intensity'], height=0.5e5,distance=1000)
    # plt.plot(OzESI_rt_df['Retention_Time'] ,OzESI_rt_df['intensity'])
    # plt.plot(OzESI_rt_df.iloc[peaks]['Retention_Time'], OzESI_rt_df.iloc[peaks]['intensity'], "x")
    # plt.ylabel('Intensity')
    # plt.xlabel('Retention Time')
    # plt.title('OzESI LC Chromatogram')
    # plt.show()
    
    return OzESI_rt_df_top

def add_rt_intensity(df, OzESI_rt_df_top):
    # Create a dictionary to map transitions to retention times and intensities
    transitions_to_rt = dict(zip(OzESI_rt_df_top['Transition'], OzESI_rt_df_top['Retention_Time']))
    transitions_to_intensity = dict(zip(OzESI_rt_df_top['Transition'], OzESI_rt_df_top['intensity']))
    
    # Use the map function to add retention times and intensities to the dataframe
    df['Retention_Time'] = df['Transition'].map(transitions_to_rt)
    df['Intensity_OzESI'] = df['Transition'].map(transitions_to_intensity)
    
    return df

def create_aldehyde_ion_dataframe():
    # Create a pandas dataframe with columns for DB_Position and Aldehyde_Ion
    df_OzESI = pd.DataFrame(columns=['DB_Position','Aldehyde_Ion'])

    # Loop over the range of DB_Position values and calculate the corresponding Aldehyde_Ion values
    for i in range(3, 21):
        df_OzESI.loc[i,'DB_Position'] = i
        df_OzESI.loc[i,'Aldehyde_Ion'] = 26 + (14 * (i-3))

    # Print the first 25 rows of the dataframe
    # print(df_OzESI.head(25))

    # Return the dataframe
    return df_OzESI

OzESI_list = [3,5,7,9,11]
def calculate_n_minus_values(df_matched, df_OzESI, OzESI_list=[3,5,7,9,11], starting_column=9, last_column=14):
    """
    Given a pandas dataframe df and a dataframe df_OzESI containing DB_Position and Aldehyde_Ion values,
    calculates the n-i values for each i in OzESI_list by subtracting the corresponding Aldehyde_Ion value
    from the Parent_Ion column in df and storing the result in new columns named 'n-i' in df.
    The starting_column and last_column parameters specify the range of columns in which the n-i values should be stored.
    """
    # Create new columns in df for n-i values
    for i in OzESI_list:
        df_matched[f"n-{i}"] = df_matched["Parent_Ion"] - df_OzESI.loc[df_OzESI["DB_Position"] == i, "Aldehyde_Ion"].values[0]
    
    # Print the shape of the dataframe and return it
    # print(df_matched.shape)
    return df_matched


# OzESI_rt_df_top = process_chromatogram(OzESI_time)


Pipeline to run all LC OzESI functions

In [13]:
#OzESI_list = [3,5,7,9,11]
def df_OzESI_pipeline(df, OzESI_time, OzESI_list=[3,5,7,9,11]):
    # Process chromatogram
    OzESI_rt_df_top = process_chromatogram(OzESI_time)
    
    df_OzESI = create_aldehyde_ion_dataframe()
    # Add retention time and intensity to dataframe
    df_processed = add_rt_intensity(df, OzESI_rt_df_top)
    # Calculate n-minus values
    df_processed = calculate_n_minus_values(df_processed, df_OzESI, OzESI_list, starting_column=9, last_column=14)
    
    return df_processed
df_OzESI_processed = df_OzESI_pipeline(df_matched, OzESI_time)
df_OzESI_processed.tail(10)

OzESI 10 largest: 
         Retention_Time   intensity  Parent_Ion  Product_Ion      Transition
50245            18.73  61029498.0       902.8        603.6  902.8 -> 603.6
4185             14.99    467244.0       874.8        575.6  874.8 -> 575.6
4114             14.74    322487.0       900.8        601.6  900.8 -> 601.6
4222             15.13    128296.0       848.8        549.6  848.8 -> 549.6
3846             13.78     78707.0       872.8        573.6  872.8 -> 573.6
3849             13.79     69875.0       846.8        547.6  846.8 -> 547.6
3809             13.65     55862.0       898.8        599.6  898.8 -> 599.6
4133             14.81     20192.0       766.7        549.6  766.7 -> 549.6
138890            5.11     17718.0       696.6        549.6  696.6 -> 549.6
4093             14.66     15985.0       792.7        575.6  792.7 -> 575.6


Unnamed: 0,Lipid,Parent_Ion,Product_Ion,Intensity,Transition,Class,Sample_ID,Retention_Time,Intensity_OzESI,n-3,n-5,n-7,n-9,n-11
134,,820.8,603.6,55098.6,820.8 -> 603.6,,FAD189_M2-5xFAD-cerebTG18-1_o3on,,,794.8,766.8,738.8,710.8,682.8
135,[TG(50:3)]_FA18:1,846.8,547.6,142150.4,846.8 -> 547.6,TAG,FAD189_M2-5xFAD-cerebTG18-1_o3on,13.79,69875.0,820.8,792.8,764.8,736.8,708.8
136,"[TG(51:9),TG(50:2)]_FA18:1",848.8,549.6,467584.0,848.8 -> 549.6,TAG,FAD189_M2-5xFAD-cerebTG18-1_o3on,15.13,128296.0,822.8,794.8,766.8,738.8,710.8
137,"[TG(51:8),TG(50:1)]_FA18:1",850.8,551.6,743307.9,850.8 -> 551.6,TAG,FAD189_M2-5xFAD-cerebTG18-1_o3on,,,824.8,796.8,768.8,740.8,712.8
138,[TG(52:4)]_FA18:1,872.8,573.6,83694.61,872.8 -> 573.6,TAG,FAD189_M2-5xFAD-cerebTG18-1_o3on,13.78,78707.0,846.8,818.8,790.8,762.8,734.8
139,"[TG(53:10),TG(52:3)]_FA18:1",874.8,575.6,409632.4,874.8 -> 575.6,TAG,FAD189_M2-5xFAD-cerebTG18-1_o3on,14.99,467244.0,848.8,820.8,792.8,764.8,736.8
140,"[TG(53:9),TG(52:2)]_FA18:1",876.8,577.6,1942159.0,876.8 -> 577.6,TAG,FAD189_M2-5xFAD-cerebTG18-1_o3on,,,850.8,822.8,794.8,766.8,738.8
141,[TG(54:5)]_FA18:1,898.8,599.6,90462.81,898.8 -> 599.6,TAG,FAD189_M2-5xFAD-cerebTG18-1_o3on,13.65,55862.0,872.8,844.8,816.8,788.8,760.8
142,"[TG(55:11),TG(54:4)]_FA18:1",900.8,601.6,240827.4,900.8 -> 601.6,TAG,FAD189_M2-5xFAD-cerebTG18-1_o3on,14.74,322487.0,874.8,846.8,818.8,790.8,762.8
143,"[TG(55:10),TG(54:3)]_FA18:1",902.8,603.6,961996.1,902.8 -> 603.6,TAG,FAD189_M2-5xFAD-cerebTG18-1_o3on,18.73,61029498.0,876.8,848.8,820.8,792.8,764.8


OzESI n-# lipid matching

In [30]:
### Not working 
#Supposed to add n-# and lipid name to lipid column, see github for previous working version
OzESI_list = [3,5,7,9,11]
def add_lipid_info(df_OzESI_processed, OzESI_list):
    # Create a copy of the original dataframe
    df_test = df_matched.copy()
    
    # Convert Parent_Ion column to float
    df_test['Parent_Ion'] = df_test['Parent_Ion'].astype(float)
    
    # Convert all n-# columns to float
    for i in OzESI_list:
        df_test['n-' + str(i)] = df_test['n-' + str(i)].astype(float)
    
    # Search for n-# in Parent_Ion and add the corresponding lipid to the Lipid column
    for i in range(len(df_test)):
        for j in range(len(df_test)):
            if pd.isna(df_test.loc[i,'Lipid']):
                # print(i,j)
                parent_ion = df_test.loc[i,'Parent_Ion']
                if parent_ion == df_test.loc[j,'n-3'] and isinstance(df_test.loc[j,'Lipid'], str):
                    df_test.loc[i,'Lipid'] = 'n-3 ' + (df_test.loc[j,'Lipid'])
                elif parent_ion == df_test.loc[j,'n-5'] and isinstance(df_test.loc[j,'Lipid'], str):
                    df_test.loc[i,'Lipid'] = 'n-5 ' + (df_test.loc[j,'Lipid'])
                elif parent_ion == df_test.loc[j,'n-7'] and isinstance(df_test.loc[j,'Lipid'], str):
                    print('n-7', i)
                    print(df_test.loc[j,'Lipid'],parent_ion)
                    df_test.loc[i,'Lipid'] = 'n-7 ' + (df_test.loc[j,'Lipid'])
                elif parent_ion == df_test.loc[j,'n-9'] and isinstance(df_test.loc[j,'Lipid'], str):
                    df_test.loc[i,'Lipid'] = 'n-9 ' + (df_test.loc[j,'Lipid'])
                elif parent_ion == df_test.loc[j,'n-11'] and isinstance(df_test.loc[j,'Lipid'], str):
                    df_test.loc[i,'Lipid'] = 'n-11 '+ (df_test.loc[j,'Lipid'])
                else:
                    pass
    df_test.dropna(subset=['Lipid'], inplace=True)
    return df_test

pd.set_option('display.max_rows', None)
df_test = add_lipid_info(df_OzESI_processed, OzESI_list)
df_test.head(None)

n-7 17
[TG(51:8),TG(50:1)]_FA18:1 768.8
n-7 20
[TG(52:4)]_FA18:1 790.8
n-7 24
[TG(54:5)]_FA18:1 816.8
n-7 53
[TG(51:8),TG(50:1)]_FA18:1 768.8
n-7 60
[TG(54:5)]_FA18:1 816.8
n-7 89
[TG(51:8),TG(50:1)]_FA18:1 768.8
n-7 96
[TG(54:5)]_FA18:1 816.8
n-7 125
[TG(51:8),TG(50:1)]_FA18:1 768.8
n-7 132
[TG(54:5)]_FA18:1 816.8


Unnamed: 0,Lipid,Parent_Ion,Product_Ion,Intensity,Transition,Class,Sample_ID,Retention_Time,Intensity_OzESI,n-3,n-5,n-7,n-9,n-11
8,"n-9 [TG(51:8),TG(50:1)]_FA18:1",740.8,551.6,676318.7,740.8 -> 551.6,,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,,,714.8,686.8,658.8,630.8,602.8
17,"n-7 [TG(51:8),TG(50:1)]_FA18:1",768.8,551.6,553109.0,768.8 -> 551.6,,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,,,742.8,714.8,686.8,658.8,630.8
18,n-9 [TG(54:5)]_FA18:1,788.8,599.6,197992.5,788.8 -> 599.6,,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,,,762.8,734.8,706.8,678.8,650.8
20,n-7 [TG(52:4)]_FA18:1,790.8,601.6,173825.5,790.8 -> 601.6,,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,,,764.8,736.8,708.8,680.8,652.8
22,n-5 [TG(50:3)]_FA18:1,792.8,603.6,583424.5,792.8 -> 603.6,,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,,,766.8,738.8,710.8,682.8,654.8
24,n-7 [TG(54:5)]_FA18:1,816.8,599.6,91407.59,816.8 -> 599.6,,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,,,790.8,762.8,734.8,706.8,678.8
25,n-5 [TG(52:4)]_FA18:1,818.8,601.6,143589.7,818.8 -> 601.6,,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,,,792.8,764.8,736.8,708.8,680.8
26,n-3 [TG(50:3)]_FA18:1,820.8,603.6,512147.1,820.8 -> 603.6,,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,,,794.8,766.8,738.8,710.8,682.8
27,[TG(50:3)]_FA18:1,846.8,547.6,1867125.0,846.8 -> 547.6,TAG,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,13.79,69875.0,820.8,792.8,764.8,736.8,708.8
28,"[TG(51:9),TG(50:2)]_FA18:1",848.8,549.6,9221320.0,848.8 -> 549.6,TAG,DOD93_F4-5xFAD-Cereb_TG18-1_o2only,15.13,128296.0,822.8,794.8,766.8,738.8,710.8


Plotting Functions

In [22]:
#import visualization libraries
import umap
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

  from .autonotebook import tqdm as notebook_tqdm


In [25]:
#Plotting functions

def plot_transition_vs_intensity(df):
    fig = px.bar(df, x="Transition", y="Intensity", color="Lipid", hover_data=['Lipid', 'Class'])
    fig.show()

def plot_class_vs_intensity_bar(df):
    fig = px.bar(df, x="Class", y="Intensity", color="Class", hover_data=['Lipid', 'Class'])
    fig.show()

def plot_class_vs_intensity_pie(df):
    fig = px.pie(df, values='Intensity', names='Class', title='Lipid Class')
    fig.show()

def plot_intensity_heatmap(df):
    fig = go.Figure(data=go.Heatmap(
        z=df['Intensity'],
        x=df['Lipid'],
        y=df['Class'],
        colorscale='Viridis'))
    fig.show()

# Example usage:
# Assuming you have the df_matching DataFrame
plot_transition_vs_intensity(df_matched)
plot_class_vs_intensity_bar(df_matched)
plot_class_vs_intensity_pie(df_matched)
plot_intensity_heatmap(df_matched)
