##### `If your files originate from a Thermo instrument:`

Copy all your *.raw files to data/raw directory and you can use the following script to generate a samples.tsv file automatically:

In [None]:
import pandas as pd
import os
df = pd.DataFrame()
df["sample_name"] = [file for file in os.listdir(os.path.join("data", "raw")) if file.endswith(".raw")]
df["sample_name"]=df["sample_name"].replace(".raw", value="", regex=True)
df["comment"] = " "
df["MAPnumber"] = " "
df.to_csv(os.path.join("config", "samples.tsv"), sep="\t")
df

##### `If your files originate from another instrument:`

Copy all your already converted files *.mzML to data/mzML directory and you can use the following script to generate a samples.tsv file automatically:

In [None]:
import pandas as pd
import os
df = pd.DataFrame()
df["sample_name"] = [file for file in os.listdir(os.path.join("data", "mzML")) if file.endswith(".mzML")]
df["sample_name"]=df["sample_name"].replace(".mzML", value="", regex=True)
df["comment"] = " "
df["MAPnumber"] = " "
df.to_csv(os.path.join("config", "samples.tsv"), sep="\t")
df

##### `Create a GNPS metadata table:`
This is datafile-dependent so it is preferable to do it interactively through a Jupyter notebook

In [None]:
# Create a metadata table from the list of mzML files compatible for GNPS

metadata = pd.DataFrame()
metadata["filename"] = [file for file in os.listdir(os.path.join("data", "mzML")) if file.endswith(".mzML")]
metadata["ATTRIBUTE_MAPID"]= ["MAP" + str(i) for i in range(len(metadata))]
metadata['ATTRIBUTE_compound'] = metadata['filename'].replace(".mzML", value="", regex=True)
metadata['ATTRIBUTE_genomeID']=metadata['filename'].str.extract(r'(NBC_?\d*|NBC?\d*)')
metadata['ATTRIBUTE_genomeID']= metadata['ATTRIBUTE_genomeID'].fillna("blank")
metadata['ATTRIBUTE_genomeIDMDNA']=metadata['filename'].str.extract(r'(MDNAWGS?\d*|MDNA_WGS_?\d*)')
metadata['ATTRIBUTE_genomeID']=metadata['ATTRIBUTE_genomeID'].fillna(metadata['ATTRIBUTE_genomeIDMDNA'])
metadata["ATTRIBUTE_media"]= metadata['filename'].str.extract(r'(ISP2|DNPM|FPY12|MA|soyM\d*)')
metadata["ATTRIBUTE_comment"]= metadata['ATTRIBUTE_genomeID'].astype(str) +"_" + metadata["ATTRIBUTE_media"].astype(str)
metadata=metadata.drop(columns="ATTRIBUTE_genomeIDMDNA")
metadata.to_csv(os.path.join("results", "GNPSexport", "metadata.tsv"), sep='\t', index= None)
metadata