In [3]:
import os
import random

def choose_random_file(folder_path):  # choose a random file in the data base
    # Get a list of all files in the folder
    files = os.listdir(folder_path)
    
    # Filter out directories (if any)
    files = [f for f in files if os.path.isfile(os.path.join(folder_path, f))]
    
    # Choose a random file from the list
    random_file = random.choice(files)
    
    # Return the full path to the random file
    return os.path.join(folder_path, random_file)

folder_path = 'C:/Users/genev/projet-prog/AAFC'
random_file = choose_random_file(folder_path)
print("Random file chosen:", random_file)

def get_theoretical_data(file_path, marker): # return the molecular formula and molecular mass of the random molecule
    try: 
        with open(file_path, 'r') as file:
            # Flag to indicate whether the marker has been found
            marker_found = False
            
            # Read lines from the file
            for line in file:
                # Check if the marker is found in the line
                if marker in line:
                    # Extract the data after the marker
                    data = line.split(marker)[-1].strip()
                    
                    # Set the flag to True
                    marker_found = True
                    
                    # Exit the loop after finding the marker
                    break
            
            # Check if the marker was found
            if not marker_found:
                print(f"The marker '{marker}' was not found in the file.")
                return None
            
            # Read and neglect any remaining lines after the marker
            for line in file:
                # Check if the line is empty or contains only whitespace
                if not line.strip():
                    continue  # Skip empty lines
                
                # Check if the line starts with the marker
                if line.strip().startswith(marker):
                    break  # Stop reading lines after encountering the marker
            
            return data
            
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

marker1 = "CH$FORMULA"
marker2 = "CH$EXACT_MASS"
molecular_formula = get_theoretical_data(random_file, marker1)
molecular_weight = get_theoretical_data(random_file, marker2)
if molecular_formula and molecular_weight is not None:
    print(f"Chemical formula {molecular_formula} \nMolecular weight {molecular_weight}")

def extract_spectral_data(file_path, start_marker, stop_marker):
    spectra = []

    try:
        with open(file_path, 'r') as file:
            # Flag to indicate whether the start marker has been found
            start_marker_found = False
            
            # Read lines from the file
            for line in file:
                # Check if the start marker is found in the line
                if start_marker in line:
                    start_marker_found = True
                    continue  # Skip the line containing the start marker
                
                # If the start marker has been found, extract spectral data
                if start_marker_found:
                    # Check if the stop marker is found in the line
                    if stop_marker in line:
                        break  # Stop reading if the stop marker is found
                    
                    # Split the line into columns (assuming space-separated values)
                    columns = line.strip().split()
                    
                    # Convert columns to numerical values
                    spectrum = [float(column) for column in columns]
                    
                    # Add the spectrum to the list of spectra
                    spectra.append(spectrum)
                    
    except Exception as e:
        print(f"Error reading file: {e}")

    return spectra

# Example usage
start_marker = 'PK$PEAK'
stop_marker = '//'
spectra = extract_spectral_data(random_file, start_marker, stop_marker)
print("Spectral data:", spectra)

Random file chosen: C:/Users/genev/projet-prog/AAFC\MSBNK-AAFC-AC000665.txt
Chemical formula : C27H27N5O4 
Molecular weight : 485.20629
Spectral data: [[53.3553, 4406.47216796875, 82.0], [53.3676, 4821.74365234375, 90.0], [55.7543, 5401.318359375, 101.0], [55.9308, 4852.1611328125, 90.0], [57.4046, 4489.50146484375, 84.0], [58.0664, 5518.47119140625, 103.0], [65.664, 4685.1171875, 87.0], [136.649, 5466.6953125, 102.0], [153.2299, 4834.400390625, 90.0], [276.0767, 10183.853515625, 192.0], [278.0924, 9739.6396484375, 183.0], [315.8497, 5709.896484375, 107.0], [324.2758, 6635.5869140625, 124.0], [342.8102, 6143.84033203125, 115.0], [405.1322, 52761.07421875, 999.0], [423.1428, 35506.7265625, 671.0], [463.1639, 7053.05908203125, 132.0], [483.3773, 6233.22119140625, 117.0], [490.1921, 19244.27734375, 363.0], [508.1955, 45410.6015625, 859.0]]
