In [1]:
import numpy as np
import pandas as pd
import glob
import os

#### `1) GNPS`
Import all identifications from GNPS (from 100 files or so) and "clean up" the table

In [2]:
df= pd.read_csv("./resources/MS2_LIBRARYSEARCH_all_identifications.tsv", sep='\t', encoding='latin-1')
df.drop(df.index[df['IonMode'] == "negative"], inplace=True)
df.drop(df.index[df['MZErrorPPM'] > 20.0], inplace=True)
GNPS=df.filter(["Compound_Name", "RT_Query", "Precursor_MZ"])
GNPS=GNPS.rename(columns= {"RT_Query": "RetentionTime"})
GNPS=GNPS.drop_duplicates(subset="Compound_Name", keep='first')
GNPS

Unnamed: 0,Compound_Name,RetentionTime,Precursor_MZ
0,dehydroxynocardamine,347.70300,585.361000
2,Massbank:PR311142 Cyclo(leucylprolyl),200.44700,211.144000
6,cyclo(L-Val-L-Pro),311.23700,197.129000
8,"""(4R)-4-((3R,5R,6S,7R,9S,10R,12S,13R,17R)-3,6,...",419.38900,871.554000
9,Massbank:PR300821 Cyclo(proline-leucine),223.96100,211.144000
...,...,...,...
356761,Spectral Match to Tyr-Ala from NIST14,76.97570,253.118000
357126,epemicinA,412.72450,793.446100
357127,epemicinB,405.53937,712.420400
357128,"epemicinB, M+H",405.53937,1423.832300


In [3]:
FeatureMatrix= pd.read_csv("results/interim/analysis/Grouped_Matrix.csv", sep='\t', index_col="Unnamed: 0")
FeatureMatrix

Unnamed: 0,mz,RT,MDNAWGS11,MDNAWGS14,NBC_00843,NBC_01116,NBC_01134
0,228.195786,473.889839,0.0,0.0,0.0,6.0,0.0
1,388.263562,637.163227,0.0,0.0,0.0,4.0,0.0
2,185.092057,78.812706,0.0,0.0,0.0,4.0,0.0
3,195.112804,100.212525,0.0,0.0,0.0,4.0,0.0
4,376.263502,634.989413,0.0,0.0,0.0,4.0,0.0
...,...,...,...,...,...,...,...
1317,507.281355,383.981733,0.0,0.0,0.0,2.0,0.0
1318,1485.641561,487.603805,0.0,0.0,0.0,2.0,0.0
1319,299.163492,52.112255,0.0,0.0,0.0,2.0,0.0
1320,1089.492884,530.120348,0.0,0.0,0.0,2.0,0.0


Annotate the features detected by GNPS according to mz and RT (mz tolerance 10 ppm and RT tolerance 20 seconds)

In [4]:
FeatureMatrix.insert(0, 'GNPS_IDs', '')

for i, mz, rt in zip(FeatureMatrix.index, FeatureMatrix['mz'], FeatureMatrix['RT']):
    hits = []
    for name, GNPS_mz, GNPS_rt, in zip(GNPS['Compound_Name'], GNPS['Precursor_MZ'], GNPS['RetentionTime']):
        mass_delta = (abs(GNPS_mz-mz)/GNPS_mz)*1000000.0 if GNPS_mz != 0 else np.nan
        if (GNPS_rt >= rt-30.0) & (GNPS_rt <= rt+30.0) & (mass_delta<= 20.0):
            hit = f'{name}'
            if hit not in hits:
                hits.append(hit)
    FeatureMatrix['GNPS_IDs'][i] = ' ## '.join(hits)

FeatureMatrix.to_csv("results/data_analysis/GNPS_annotated_feature_matrix.tsv", sep='\t', index = False)
FeatureMatrix

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  FeatureMatrix['GNPS_IDs'][i] = ' ## '.join(hits)


Unnamed: 0,GNPS_IDs,mz,RT,MDNAWGS11,MDNAWGS14,NBC_00843,NBC_01116,NBC_01134
0,,228.195786,473.889839,0.0,0.0,0.0,6.0,0.0
1,,388.263562,637.163227,0.0,0.0,0.0,4.0,0.0
2,cyclo-(Ala-4-hydroxy-Pro),185.092057,78.812706,0.0,0.0,0.0,4.0,0.0
3,,195.112804,100.212525,0.0,0.0,0.0,4.0,0.0
4,,376.263502,634.989413,0.0,0.0,0.0,4.0,0.0
...,...,...,...,...,...,...,...,...
1317,,507.281355,383.981733,0.0,0.0,0.0,2.0,0.0
1318,,1485.641561,487.603805,0.0,0.0,0.0,2.0,0.0
1319,,299.163492,52.112255,0.0,0.0,0.0,2.0,0.0
1320,,1089.492884,530.120348,0.0,0.0,0.0,2.0,0.0


Keep the unannotated features only

In [5]:
FeatureMatrix= FeatureMatrix[FeatureMatrix.GNPS_IDs == '']
FeatureMatrix= FeatureMatrix.drop(columns= "GNPS_IDs")
FeatureMatrix= FeatureMatrix.set_index(["RT", "mz"])
FeatureMatrix_tocsv= FeatureMatrix.reset_index()
FeatureMatrix_tocsv.to_csv("results/data_analysis/FeatureMatrix_unknowns.tsv", sep="\t", index =None)
FeatureMatrix

Unnamed: 0_level_0,Unnamed: 1_level_0,MDNAWGS11,MDNAWGS14,NBC_00843,NBC_01116,NBC_01134
RT,mz,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
473.889839,228.195786,0.0,0.0,0.0,6.0,0.0
637.163227,388.263562,0.0,0.0,0.0,4.0,0.0
100.212525,195.112804,0.0,0.0,0.0,4.0,0.0
634.989413,376.263502,0.0,0.0,0.0,4.0,0.0
280.575491,285.134468,0.0,0.0,0.0,4.0,0.0
...,...,...,...,...,...,...
383.981733,507.281355,0.0,0.0,0.0,2.0,0.0
487.603805,1485.641561,0.0,0.0,0.0,2.0,0.0
52.112255,299.163492,0.0,0.0,0.0,2.0,0.0
530.120348,1089.492884,0.0,0.0,0.0,2.0,0.0


#### `2) SIRIUS and CSI:FingerID annotations`

Create a matrix with all SIRIUS and CSI:FingerID formula and structural predictions, only choose #1 rankings predictions and combine the dataframes to annotate formula and structural predictions according to RT and mz:

In [6]:
import glob
import pandas as pd
import numpy as np

input_formulas = glob.glob("results/formulas/formulas_*.csv")
input_structures = glob.glob("results/structures/structures_*.csv")

df_formulas will contain likely duplicate formulas that could be either isomeric, isobaric compounds, or identical compounds (with identical RT and mz)

In [7]:
DF_SIRIUS = pd.DataFrame()
list_of_df=[]
for csv in input_formulas:
    df= pd.read_csv(csv, sep=",", index_col="Unnamed: 0")
    s= df["opt_global_rank"]
    pd.to_numeric(s)
    df= df.loc[df["opt_global_rank"]==1]
    df_score=df.filter(regex=fr"Score")
    df_opt=df.filter(regex=fr"opt")
    cols_score= df_score.columns
    cols_opt= df_opt.columns
    df= df.drop(columns=cols_score)
    df= df.drop(columns= cols_opt)
    df=df.reset_index()
    list_of_df.append(df)

DF_SIRIUS= pd.concat(list_of_df,ignore_index=True)
DF_SIRIUS= DF_SIRIUS.drop(columns="index")
df_formulas= DF_SIRIUS.rename(columns= {"chemical_formula": "formulas", "exp_mass_to_charge": "mz", "retention_time": "RT"})
df_formulas = df_formulas.set_index("formulas")
df_singletons=df_formulas.reset_index().drop_duplicates(subset="formulas", keep=False)

df_singletons= df_singletons.set_index("formulas")
idx= df_singletons.index
df_sirius= df_formulas.drop(idx)
new_df= pd.DataFrame()
df= pd.DataFrame()
idx= df_sirius.index
for i, index in enumerate(idx):
    new_index= new_df.index
    if index not in new_index:
        s= df_sirius.iloc[i]
        new_df= new_df.append(s)
    else:
        #print(index)
        mz_0= df_sirius["mz"][i]
        mz_1= new_df["mz"][index]
        time_0= df_sirius["RT"][i]
        time_1= new_df["RT"][index]
        #(print(mz_0, time_0, mz_1, time_1))
        mass_delta = (abs(mz_0 - mz_1)/mz_0)*1000000
        maxdeltaRT = time_0 + 30.0
        mindeltaRT = time_0 - 30.0
        if (mindeltaRT<= time_1 <= maxdeltaRT) & (mass_delta<= 20.0):
            pass
        else:
            m= df_sirius.iloc[i]
            df= df.append(m)

DF_SIRIUS= pd.concat([new_df, df], axis=0)
DF_SIRIUS_final= pd.concat([DF_SIRIUS, df_singletons], axis=0)
DF_SIRIUS_final= DF_SIRIUS_final.reset_index()
DF_SIRIUS_final= DF_SIRIUS_final.rename(columns={"index":"formulas"})
DF_SIRIUS_final.to_csv("results/data_analysis/SIRIUS_library.csv", sep="\t", index=None)
DF_SIRIUS_final

Unnamed: 0,formulas,mz,RT
0,C12H23NO10,342.139429,47.929561
1,C12H25NO11,360.150107,47.929561
2,C12H20O10,325.112906,47.929561
3,C18H35NO16,522.203094,51.640810
4,C24H42O21,667.229504,51.640810
...,...,...,...
4318,C25H54N4O11,569.379725,634.086957
4319,C21H36O7,423.237816,643.592628
4320,C31H53N5O5,598.394986,649.534535
4321,C22H42O6,425.289790,647.154538


Repeat for structral predictions (remove duplicates with the same inchi_keys, which means they represent the same structure):

In [8]:
DF_CSI= []
for i, formulas in enumerate(input_structures):
    df= pd.read_csv(formulas, index_col="Unnamed: 0")
    df= df.loc[df["opt_global_rank"]==1]
    df_score=df.filter(regex=fr"best_search_engine_score")
    df_opt=df.filter(regex=fr"opt")
    cols_score= df_score.columns
    cols_opt= df_opt.columns
    df= df.drop(columns=cols_score)
    df= df.drop(columns= cols_opt)
    df= df.drop(columns= "identifier")
    df=df.reset_index()
    df= df.drop(columns="index")
    DF_CSI.append(df)


df_structures= pd.concat(DF_CSI, axis=0).sort_values("chemical_formula")
df_structures = df_structures.drop_duplicates(subset=['inchi_key'], keep='first')
df_structures= df_structures.drop(columns=["inchi_key"]) #leave smiles for visualisationdf_structures= df_structures.rename(columns={"chemical_formula": "formulas", "exp_mass_to_charge": "mz", "retention_time": "RT"})
df_structures= df_structures.rename(columns={"chemical_formula":"formulas"})
df_structures= df_structures.set_index("formulas")
df_singletons=df_structures.reset_index().drop_duplicates(subset="formulas", keep=False)
df_singletons= df_singletons.set_index("formulas")
idx= df_singletons.index
df_CSI= df_structures.drop(labels=idx, axis=0)
new_df= pd.DataFrame()
df= pd.DataFrame()
idx= df_CSI.index
for i, index in enumerate(idx):
    new_index= new_df.index
    if index not in new_index:
        s= df_CSI.iloc[i]
        new_df= new_df.append(s)
    else:
        #print(index)
        mz_0= df_CSI["exp_mass_to_charge"][i]
        mz_1= new_df["exp_mass_to_charge"][index]
        time_0= df_CSI["retention_time"][i]
        time_1= new_df["retention_time"][index]
        #(print(mz_0, time_0, mz_1, time_1))
        mass_delta = (abs(mz_0 - mz_1)/mz_0)*1000000
        maxdeltaRT = time_0 + 30.0
        mindeltaRT = time_0 - 30.0
        if (mindeltaRT<= time_1 <= maxdeltaRT) & (mass_delta<= 20.0):
            pass
        else:
            m= df_CSI.iloc[i]
            df= df.append(m)


DF_CSI= pd.concat([new_df, df], axis=0)
DF_CSI_final= pd.concat([DF_CSI, df_singletons], axis=0)
DF_CSI_final= DF_CSI_final.reset_index()
DF_CSI_final= DF_CSI_final.rename(columns={"index":"formulas"})
DF_CSI_final.to_csv("results/data_analysis/CSI_library.csv", sep="\t", index= None)
DF_CSI_final

Unnamed: 0,formulas,smiles,description,exp_mass_to_charge,retention_time
0,C14H27N3O5,CC(C)CC(C(=O)NC(C(C)C)C(=O)O)NC(=O)C(CO)N,2-[[2-[(2-amino-3-hydroxypropanoyl)amino]-4-me...,318.202393,232.371513
1,C15H25N3O3,CNC(=O)C(C1CC1)N2CCCC2C(=O)N3CCOCC3,,318.181296,280.683843
2,C16H26N2O6,COCC(=O)NC1COC2C1OCC2OC(=O)NC3CCCCC3,,343.187880,194.703205
3,C17H25N5O2,CN(C)CCCNCCCNC(=O)C1=NC2=CC=CC=C2C(=O)N1,N-(3-{[3-(dimethylamino)propyl]amino}propyl)-4...,349.237488,417.928308
4,C18H28O4,C(CCCCCCC(=O)O)CCCCCC#CC=CC(=O)O,(E)-octadec-2-en-4-ynedioic acid,309.203924,501.396179
...,...,...,...,...,...
719,C59H78N4O9,CC1CCC2=CCC3C(CC(C(C4C(CC#CC(CC3C2C1)C5COC(=O)...,,987.589352,437.846465
720,C59H78O14,CCC(CC1CC(CC=CC(CC(CC(=CC=CC=CC=CC2=C(C(=CC=C2...,,1011.537676,369.419101
721,C60H78N4O7,CCNCC1CC2C3CCC(=O)CC3NCC2CC1C4CCC5(C#CC(C6=CC(...,,984.625424,385.784229
722,C65H89N7O9,CCNC(CC=C(C)C)(C(=C)C=CC=C1C(CCCC2C1C(C3(C2)C4...,,1112.673108,374.674933


Annotate the formulas and structural predictions to the feature matrix according to SIRIUS and CSI:

In [9]:
DF_features= pd.read_csv("results/interim/analysis/Matrix_no_blanks.csv", sep="\t", index_col="Unnamed: 0")
DF_features

Unnamed: 0,mz,RT,20211009_UMETAB222_POS_12_FPY12_Plate-13_NBC_01116_rep1.mzML,20211009_UMETAB222_POS_24_FPY12_Plate-13_NBC_01116_rep2.mzML,20211009_UMETAB222_POS_33_FPY12_Plate-13_NBC_01134_rep3.mzML,20210826_UMETAB219_POS_ISP2_Plate-1_MDNAWGS11_rep1.mzML,20210827_UMETAB219_POS_ISP2_Plate-2_MDNAWGS14_rep1.mzML,20211006_UMETAB222_POS_28_ISP2_Plate-11_NBC_00843_rep3.mzML,20211009_UMETAB222_POS_10_ISP2_Plate-13_NBC_01116_rep1.mzML,20211009_UMETAB222_POS_22_ISP2_Plate-13_NBC_01116_rep2.mzML,20211009_UMETAB222_POS_31_ISP2_Plate-13_NBC_01134_rep3.mzML,20211009_UMETAB222_POS_11_DNPM_Plate-13_NBC_01116_rep1.mzML,20211009_UMETAB222_POS_23_DNPM_Plate-13_NBC_01116_rep2.mzML,20211009_UMETAB222_POS_32_DNPM_Plate-13_NBC_01134_rep3.mzML
0,228.195786,473.889839,1.0,1.0,,,,,1.0,1.0,,1.0,1.0,
1,388.263562,637.163227,,,,,,,1.0,1.0,,1.0,1.0,
2,185.092057,78.812706,,,,,,,1.0,1.0,,1.0,1.0,
3,195.112804,100.212525,,,,,,,1.0,1.0,,1.0,1.0,
4,376.263502,634.989413,,,,,,,1.0,1.0,,1.0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1317,507.281355,383.981733,,,,,,,,,,1.0,1.0,
1318,1485.641561,487.603805,,,,,,,,,,1.0,1.0,
1319,299.163492,52.112255,,,,,,,,,,1.0,1.0,
1320,1089.492884,530.120348,,,,,,,,,,1.0,1.0,


In [10]:
DF_features.insert(0, 'CSI_predictions_name', '')
DF_features.insert(0, 'CSI_predictions_formula', '')
DF_features.insert(0, 'CSI_predictions_smiles', '')


for i, mz, rt in zip(DF_features.index, DF_features['mz'], DF_features['RT']):
    hits1 = []
    hits2= []
    hits3=[]
    for name, smiles, formula, Pred_mz, Pred_rt, in zip(DF_CSI_final['description'], DF_CSI_final['smiles'], DF_CSI_final['formulas'], DF_CSI_final['exp_mass_to_charge'], DF_CSI_final['retention_time']):
        mass_delta = (abs(Pred_mz-mz)/Pred_mz)*1000000.0 if Pred_mz != 0 else 0
        if (Pred_rt >= rt-30.0) & (Pred_rt <= rt+30.0) & (mass_delta<= 20.0):
            hit1 = f'{name}'
            hit2 = f'{formula}'
            hit3= f'{smiles}'
            if hit1 not in hits1:
                hits1.append(hit1)
                hits2.append(hit2)
                hits3.append(hit3)
    DF_features['CSI_predictions_name'][i] = ' ## '.join(hits1)
    DF_features['CSI_predictions_formula'][i] = ' ## '.join(hits2)
    DF_features['CSI_predictions_smiles'][i] = ' ## '.join(hits3)
DF_features

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  DF_features['CSI_predictions_name'][i] = ' ## '.join(hits1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  DF_features['CSI_predictions_formula'][i] = ' ## '.join(hits2)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  DF_features['CSI_predictions_smiles'][i] = ' ## '.join(hits3)


Unnamed: 0,CSI_predictions_smiles,CSI_predictions_formula,CSI_predictions_name,mz,RT,20211009_UMETAB222_POS_12_FPY12_Plate-13_NBC_01116_rep1.mzML,20211009_UMETAB222_POS_24_FPY12_Plate-13_NBC_01116_rep2.mzML,20211009_UMETAB222_POS_33_FPY12_Plate-13_NBC_01134_rep3.mzML,20210826_UMETAB219_POS_ISP2_Plate-1_MDNAWGS11_rep1.mzML,20210827_UMETAB219_POS_ISP2_Plate-2_MDNAWGS14_rep1.mzML,20211006_UMETAB222_POS_28_ISP2_Plate-11_NBC_00843_rep3.mzML,20211009_UMETAB222_POS_10_ISP2_Plate-13_NBC_01116_rep1.mzML,20211009_UMETAB222_POS_22_ISP2_Plate-13_NBC_01116_rep2.mzML,20211009_UMETAB222_POS_31_ISP2_Plate-13_NBC_01134_rep3.mzML,20211009_UMETAB222_POS_11_DNPM_Plate-13_NBC_01116_rep1.mzML,20211009_UMETAB222_POS_23_DNPM_Plate-13_NBC_01116_rep2.mzML,20211009_UMETAB222_POS_32_DNPM_Plate-13_NBC_01134_rep3.mzML
0,,,,228.195786,473.889839,1.0,1.0,,,,,1.0,1.0,,1.0,1.0,
1,,,,388.263562,637.163227,,,,,,,1.0,1.0,,1.0,1.0,
2,,,,185.092057,78.812706,,,,,,,1.0,1.0,,1.0,1.0,
3,,,,195.112804,100.212525,,,,,,,1.0,1.0,,1.0,1.0,
4,,,,376.263502,634.989413,,,,,,,1.0,1.0,,1.0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1317,,,,507.281355,383.981733,,,,,,,,,,1.0,1.0,
1318,,,,1485.641561,487.603805,,,,,,,,,,1.0,1.0,
1319,,,,299.163492,52.112255,,,,,,,,,,1.0,1.0,
1320,,,,1089.492884,530.120348,,,,,,,,,,1.0,1.0,


In [11]:
DF_features.insert(0, 'SIRIUS_predictions', '')

for i, mz, rt in zip(DF_features.index, DF_features['mz'], DF_features['RT']):
    hits = []
    for name, Pred_mz, Pred_rt, in zip(DF_SIRIUS_final['formulas'], DF_SIRIUS_final['mz'], DF_SIRIUS_final['RT']):
        mass_delta = (abs(Pred_mz-mz)/Pred_mz)*1000000.0 if Pred_mz != 0 else 0
        if (Pred_rt >= rt-30.0) & (Pred_rt <= rt+30.0) & (mass_delta<= 20.0):
            hit = f'{name}'
            if hit not in hits:
                hits.append(hit)
    DF_features['SIRIUS_predictions'][i] = ' ## '.join(hits)
DF_features

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  DF_features['SIRIUS_predictions'][i] = ' ## '.join(hits)


Unnamed: 0,SIRIUS_predictions,CSI_predictions_smiles,CSI_predictions_formula,CSI_predictions_name,mz,RT,20211009_UMETAB222_POS_12_FPY12_Plate-13_NBC_01116_rep1.mzML,20211009_UMETAB222_POS_24_FPY12_Plate-13_NBC_01116_rep2.mzML,20211009_UMETAB222_POS_33_FPY12_Plate-13_NBC_01134_rep3.mzML,20210826_UMETAB219_POS_ISP2_Plate-1_MDNAWGS11_rep1.mzML,20210827_UMETAB219_POS_ISP2_Plate-2_MDNAWGS14_rep1.mzML,20211006_UMETAB222_POS_28_ISP2_Plate-11_NBC_00843_rep3.mzML,20211009_UMETAB222_POS_10_ISP2_Plate-13_NBC_01116_rep1.mzML,20211009_UMETAB222_POS_22_ISP2_Plate-13_NBC_01116_rep2.mzML,20211009_UMETAB222_POS_31_ISP2_Plate-13_NBC_01134_rep3.mzML,20211009_UMETAB222_POS_11_DNPM_Plate-13_NBC_01116_rep1.mzML,20211009_UMETAB222_POS_23_DNPM_Plate-13_NBC_01116_rep2.mzML,20211009_UMETAB222_POS_32_DNPM_Plate-13_NBC_01134_rep3.mzML
0,C13H25NO2,,,,228.195786,473.889839,1.0,1.0,,,,,1.0,1.0,,1.0,1.0,
1,C27H33NO ## C25H35NO,,,,388.263562,637.163227,,,,,,,1.0,1.0,,1.0,1.0,
2,C9H14N3O ## C8H12N2O3 ## C8H9NO3 ## C6H7N4O2,,,,185.092057,78.812706,,,,,,,1.0,1.0,,1.0,1.0,
3,C10H11NO2 ## C10H14N2O2,,,,195.112804,100.212525,,,,,,,1.0,1.0,,1.0,1.0,
4,C24H35NO ## C26H33NO,,,,376.263502,634.989413,,,,,,,1.0,1.0,,1.0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1317,,,,,507.281355,383.981733,,,,,,,,,,1.0,1.0,
1318,,,,,1485.641561,487.603805,,,,,,,,,,1.0,1.0,
1319,,,,,299.163492,52.112255,,,,,,,,,,1.0,1.0,
1320,,,,,1089.492884,530.120348,,,,,,,,,,1.0,1.0,


In [12]:
DF_features.to_csv("results/data_analysis/FeatureMatrix_SIRIUS_CSI.csv", sep="\t", index= None)