In [None]:
from convertors.convert_to_json import *
from mols_calculation import *
import pandas as pd
import os
import re

In [None]:
global In_Silico_pattern
In_Silico_pattern = re.compile(r"in.silico|insilico|predicted|theoretical|Annotation.level.3", flags=re.IGNORECASE)

In [None]:
def in_filename(filename):
    """
    :param filename: The name of the file to be checked for the presence of the string "MSMS_Public" and matching a specific pattern.
    :return: Returns True if the filename does not contain "MSMS_Public" and matches a specific pattern defined by the In_Silico_pattern. Returns False otherwise.

    """
    if "MSMS_Public" not in filename:
        if re.search(In_Silico_pattern, filename):
            return True

    return False

In [None]:
def normalize_predicted(metadata_dict):
    """
    Normalize the predicted field in the given metadata dictionary.

    :param metadata_dict: A dictionary containing metadata information.
    :return: The updated metadata dictionary with the normalized predicted field.
    """
    comment_field = metadata_dict["COMMENT"]
    predicted = metadata_dict["PREDICTED"]
    filename = metadata_dict["FILENAME"]

    if re.search(In_Silico_pattern, comment_field) or predicted == "true" or in_filename(filename):
        metadata_dict["PREDICTED"] = "true"
        return metadata_dict
    else:
        metadata_dict["PREDICTED"] = "false"
        return metadata_dict

In [None]:
def convert_keys(dict_list):
    """
    Convert keys in metadata_dict based on the provided keys_dict and keys_list.

    :param metadata_dict: A dictionary containing metadata information.
    :return: A dictionary with converted keys based on the provided keys_dict and keys_list.
    """
    output = []
    for metadata_dict in dict_list:
        converted = {keys_dict[key.lower()]: val for key, val in metadata_dict.items() if key.lower() in keys_dict and keys_dict[key.lower()] in keys_list}
    
        converted.update({key: "" for key in keys_list if key not in converted})
        output.append(converted)

    return output

In [None]:
original_db_path = r"C:\Users\Axel\Documents\PYTHON\FragHub\INPUT"

In [None]:
FINAL_MSP, FINAL_XML, FINAL_CSV, FINAL_JSON, FINAL_MGF = convert_to_json(original_db_path)

normalisé les clés

In [None]:
FINAL_MSP = convert_keys(FINAL_MSP)

In [None]:
FINAL_XML = convert_keys(FINAL_XML)

In [None]:
FINAL_CSV = convert_keys(FINAL_CSV)

In [None]:
FINAL_JSON = convert_keys(FINAL_JSON)

In [None]:
FINAL_MGF = convert_keys(FINAL_MGF)

In [None]:
print(len(FINAL_MSP)+len(FINAL_XML)+len(FINAL_CSV)+len(FINAL_JSON) + len(FINAL_MGF))

ajouter la colonne 'PREDICTED' et la peupler

In [None]:
compteur =  0
for i in range(len(FINAL_MSP)):
    FINAL_MSP[i]["PREDICTED"] = ""
    FINAL_MSP[i] = normalize_predicted(FINAL_MSP[i])

In [None]:
compteur =  0
for i in range(len(FINAL_XML)):
    FINAL_XML[i]["PREDICTED"] = ""
    FINAL_XML[i] = normalize_predicted(FINAL_XML[i])

In [None]:
compteur =  0
for i in range(len(FINAL_CSV)):
    FINAL_CSV[i]["PREDICTED"] = ""
    FINAL_CSV[i] = normalize_predicted(FINAL_CSV[i])

In [None]:
compteur =  0
for i in range(len(FINAL_JSON)):
    FINAL_JSON[i]["PREDICTED"] = ""
    FINAL_JSON[i] = normalize_predicted(FINAL_JSON[i])

In [None]:
compteur =  0
for i in range(len(FINAL_MGF)):
    FINAL_MGF[i]["PREDICTED"] = ""
    FINAL_MGF[i] = normalize_predicted(FINAL_MGF[i])

In [None]:
msp_df = pd.DataFrame(FINAL_MSP)
del FINAL_MSP

In [None]:
xml_df = pd.DataFrame(FINAL_XML)
del FINAL_XML

In [None]:
csv_df = pd.DataFrame(FINAL_CSV)
del FINAL_CSV

In [None]:
json_df = pd.DataFrame(FINAL_JSON)
del FINAL_JSON

In [None]:
mgf_df = pd.DataFrame(FINAL_MGF)
del FINAL_MGF

On prend que les LC EXP

In [None]:
msp_df = msp_df[~msp_df['INSTRUMENTTYPE'].str.contains('GC|EI', case=False)]
msp_df = msp_df[msp_df['PREDICTED'] == "false"]

In [None]:
xml_df = xml_df[~xml_df['INSTRUMENTTYPE'].str.contains('GC|EI', case=False)]
xml_df = xml_df[xml_df['PREDICTED'] == "false"]

In [ ]:
csv_df = csv_df[~csv_df['INSTRUMENTTYPE'].str.contains('GC|EI', case=False)]
csv_df = csv_df[csv_df['PREDICTED'] == "false"]

In [ ]:
json_df = json_df[~json_df['INSTRUMENTTYPE'].str.contains('GC|EI', case=False)]
json_df = json_df[json_df['PREDICTED'] == "false"]

In [ ]:
mgf_df = mgf_df[~mgf_df['INSTRUMENTTYPE'].str.contains('GC|EI', case=False)]
mgf_df = mgf_df[mgf_df['PREDICTED'] == "false"]

passer RDkit sur chacun des spectres

In [None]:
if not msp_df.empty:
    msp_df = mols_derivation_and_calculation(msp_df)

In [None]:
if not xml_df.empty:
    xml_df = mols_derivation_and_calculation(xml_df)

In [None]:
if not csv_df.empty:
    csv_df = mols_derivation_and_calculation(csv_df)

In [None]:
if not json_df.empty:
    json_df = mols_derivation_and_calculation(json_df)

In [None]:
if not mgf_df.empty:
    mgf_df = mols_derivation_and_calculation(mgf_df)

écrire les fichiers de sorti

In [None]:
msp_df.to_csv(r"C:\Users\Axel\Documents\PYTHON\FragHub\TOOLS\Generate_csv_from_original_files\output\msp_df.csv", sep=";", quotechar='"', index=False, encoding="UTF-8")

In [None]:
xml_df.to_csv(r"C:\Users\Axel\Documents\PYTHON\FragHub\TOOLS\Generate_csv_from_original_files\output\xml_df.csv", sep=";", quotechar='"', index=False, encoding="UTF-8")

In [None]:
csv_df.to_csv(r"C:\Users\Axel\Documents\PYTHON\FragHub\TOOLS\Generate_csv_from_original_files\output\csv_df.csv", sep=";", quotechar='"', index=False, encoding="UTF-8")

In [None]:
json_df.to_csv(r"C:\Users\Axel\Documents\PYTHON\FragHub\TOOLS\Generate_csv_from_original_files\output\json_df.csv", sep=";", quotechar='"', index=False, encoding="UTF-8")

In [None]:
mgf_df.to_csv(r"C:\Users\Axel\Documents\PYTHON\FragHub\TOOLS\Generate_csv_from_original_files\output\mgf_df.csv", sep=";", quotechar='"', index=False, encoding="UTF-8")