In [4]:
import pandas as pd

# Load the dataset
df = pd.read_csv('OzON_Possible.csv')

# Create the Species column by concatenating 'Chain length' and 'DB Number' with a colon
df['Species'] = df['Chain length'].astype(str) + ':' + df['DB Number'].astype(str)

# Add the Product_Ion column with all values set to 183.0
df.insert(columns.index('Parent_Ion') + 1, 'Product_Ion', 183.0)

# Reorder the columns to place 'Species' after 'Parent_Ion' and 'Product_Ion'
columns = df.columns.tolist()
columns.insert(columns.index('Parent_Ion') + 2, columns.pop(columns.index('Species')))
df = df[columns]

# Add the Class column after the Species column
df.insert(columns.index('Species') + 1, 'Class', 'FA')

# Remove the Chain length and DB Number columns
df.drop(['Chain length', 'DB Number'], axis=1, inplace=True)

# Save the updated DataFrame to a CSV file
df.to_csv('OzON_Possible_Database.csv', index=False)

# Display the updated DataFrame
df


Unnamed: 0,Lipid,Parent_Ion,Product_Ion,Species,Class,DB Location,FA mass,[FA+AMP]+
0,FA(6:1)_<>_n-2,269.2,183.0,6:1,FA,<>,114.1,281.2
1,FA(6:1)_<>_n-3,255.2,183.0,6:1,FA,<>,114.1,281.2
2,FA(6:1)_<>_n-4,241.2,183.0,6:1,FA,<>,114.1,281.2
3,FA(6:2)_<B>_n-2,267.2,183.0,6:2,FA,<B>,112.1,279.2
4,FA(6:2)_<B>_n-3,253.2,183.0,6:2,FA,<B>,112.1,279.2
...,...,...,...,...,...,...,...,...
6504,FA(30:6)_<FFFFF>_n-24,297.5,183.0,30:6,FA,<FFFFF>,440.4,607.5
6505,FA(30:6)_<FFFFF>_n-25,283.5,183.0,30:6,FA,<FFFFF>,440.4,607.5
6506,FA(30:6)_<FFFFF>_n-26,269.5,183.0,30:6,FA,<FFFFF>,440.4,607.5
6507,FA(30:6)_<FFFFF>_n-27,255.5,183.0,30:6,FA,<FFFFF>,440.4,607.5


# Add OzON Possible database with the ##:0 lipids

In [2]:
import pandas as pd
import re

df = pd.read_csv('OzON_Possible_Database.csv')
df2 = pd.read_csv('FA_0.csv')
df
df2
df3 = pd.concat([df, df2], ignore_index=True)

# Function to extract the carbon number and double bond count
def extract_lipid_info(lipid):
    match = re.match(r'FA\((\d+):(\d+)\)', lipid)
    if match:
        return int(match.group(1)), int(match.group(2))
    else:
        return float('inf'), float('inf')  # Return a large number to push unknown formats to the end
    
# Apply the function to the Lipid column and create new columns for sorting

df3 = df3.sort_values(by=['Species','Parent_Ion'], ascending=[True, False])


# Custom sorting function
def extract_species_info(species):
    parts = species.split(':')
    carbon_number = float(parts[0].replace('d2-', '').replace('inf', '0')) if parts[0].isdigit() else float('inf')
    double_bond = float(parts[1]) if len(parts) > 1 and parts[1].isdigit() else float('inf')
    return carbon_number, double_bond

# Apply sorting
df3['species_sort'] = df3['Species'].apply(extract_species_info)
df3 = df3.sort_values(by=['species_sort', 'Parent_Ion'], ascending=[True, False]).drop(columns='species_sort')
df3.to_csv('OzON_Possible_Database_0.csv', index=False)
df3

Unnamed: 0,Lipid,Parent_Ion,Product_Ion,Species,Class,DB Location,FA mass,[FA+AMP]+
6509,FA(5:0),269.3,183.0,5:0,FA,,102.2,281.3
6510,FA(6:0),283.3,183.0,6:0,FA,,116.2,295.3
0,FA(6:1)_<>_n-2,269.2,183.0,6:1,FA,<>,114.1,281.2
1,FA(6:1)_<>_n-3,255.2,183.0,6:1,FA,<>,114.1,281.2
2,FA(6:1)_<>_n-4,241.2,183.0,6:1,FA,<>,114.1,281.2
...,...,...,...,...,...,...,...,...
6462,FA(30:6)_<BBFFF>_n-26,265.5,183.0,30:6,FA,<BBFFF>,440.4,607.5
6507,FA(30:6)_<FFFFF>_n-27,255.5,183.0,30:6,FA,<FFFFF>,440.4,607.5
6485,FA(30:6)_<BFFFF>_n-27,253.5,183.0,30:6,FA,<BFFFF>,440.4,607.5
6508,FA(30:6)_<FFFFF>_n-28,241.5,183.0,30:6,FA,<FFFFF>,440.4,607.5
