In [17]:
def calculate_formula(mass, tolerance):
    # Define atomic masses (in atomic mass units)
    atomic_masses = {'C': 12.01, 'H': 1.008, 'O': 16.00, 'N': 14.01, 'S': 32.07}

    # Define constraints on elemental ratios
    min_ratio = {'C': 0.1, 'H': 0.1, 'O': 0.0, 'N': 0.0, 'S': 0.0}  # Minimum ratio for each element
    max_ratio = {'C': 0.9, 'H': 2.0, 'O': 1.0, 'N': 0.5, 'S': 0.2}  # Maximum ratio for each element

    possible_formulas = []

    # Iterate over possible combinations of carbon, hydrogen, oxygen, nitrogen, and sulfur atoms
    for num_C in range(1, int((mass + tolerance) / atomic_masses['C']) + 1):
        for num_H in range(1, int((mass + tolerance) / atomic_masses['H']) + 1):
            for num_O in range(int((mass + tolerance) / atomic_masses['O']) + 1):
                for num_N in range(int((mass + tolerance) / atomic_masses['N']) + 1):
                    for num_S in range(int((mass + tolerance) / atomic_masses['S']) + 1):
                        # Calculate the total mass for the current combination of atoms
                        total_mass = (num_C * atomic_masses['C'] + num_H * atomic_masses['H'] +
                                      num_O * atomic_masses['O'] + num_N * atomic_masses['N'] +
                                      num_S * atomic_masses['S'])
                        # Check if the total mass is within the tolerance range
                        if abs(total_mass - mass) <= tolerance:
                            # Calculate the ratios of atoms for the current combination
                            total_atoms = num_C + num_H + num_O + num_N + num_S
                            ratio_C = num_C / total_atoms
                            ratio_H = num_H / total_atoms
                            ratio_O = num_O / total_atoms
                            ratio_N = num_N / total_atoms
                            ratio_S = num_S / total_atoms
                            # Check if the ratios satisfy the constraints
                            if (min_ratio['C'] <= ratio_C <= max_ratio['C'] and
                                min_ratio['H'] <= ratio_H <= max_ratio['H'] and
                                min_ratio['O'] <= ratio_O <= max_ratio['O'] and
                                min_ratio['N'] <= ratio_N <= max_ratio['N'] and
                                min_ratio['S'] <= ratio_S <= max_ratio['S']):
                                # If the constraints are satisfied, add the formula to the list
                                possible_formulas.append(f'C{num_C}H{num_H}O{num_O}N{num_N}S{num_S}')  # Format the formula string
    
    return possible_formulas

def refine_formulas(possible_formulas, mass_spectrum):
    # Simple refinement based on observed peaks
    refined_formulas = []
    for formula in possible_formulas:
        # Extract atomic counts from the formula
        num_C = int(formula.split('C')[1].split('H')[0].split('O')[0].split('N')[0].split('S')[0])
        num_H = int(formula.split('H')[1].split('O')[0].split('N')[0].split('S')[0])
        num_O = int(formula.split('O')[1].split('N')[0].split('S')[0])
        num_N = int(formula.split('N')[1].split('S')[0])
        num_S = int(formula.split('S')[1])
        
        # Calculate total mass of the formula
        total_mass = num_C * 12 + num_H * 1 + num_O * 16 + num_N * 14 + num_S * 32
        
        # Check if any mass in the formula is present in the mass spectrum
        if total_mass in mass_spectrum:
            refined_formulas.append(formula)
    return refined_formulas

# Example mass spectrum data (mass: intensity)
mass_spectrum = {
    100: 50,   # Peak corresponding to a compound with molecular weight 100
    101: 100,  # Isotopic peak or another compound with molecular weight 101
    102: 80,   # Peak corresponding to a compound with molecular weight 102
}

# Calculate possible formulas
possible_formulas = calculate_formula(100, 5)
print("Possible Formulas After Calculation:", possible_formulas)

# Refine formulas based on observed peaks
refined_formulas = refine_formulas(possible_formulas, mass_spectrum)

print("Refined Formulas After Refinement:")
for formula in refined_formulas:
    print(formula)

Possible Formulas After Calculation: ['C1H1O1N3S1', 'C1H1O2N4S0', 'C1H1O3N3S0', 'C1H2O0N4S1', 'C1H2O1N3S1', 'C1H2O2N4S0', 'C1H2O3N3S0', 'C1H3O0N4S1', 'C1H3O1N5S0', 'C1H3O2N4S0', 'C1H3O3N0S1', 'C1H3O5N0S0', 'C1H4O0N4S1', 'C1H4O3N0S1', 'C1H4O5N0S0', 'C1H5O2N1S1', 'C1H5O3N0S1', 'C1H6O0N1S2', 'C1H6O1N0S2', 'C2H1O0N3S1', 'C2H1O1N2S1', 'C2H1O1N4S0', 'C2H1O2N1S1', 'C2H1O2N3S0', 'C2H1O3N2S0', 'C2H1O4N1S0', 'C2H2O0N3S1', 'C2H2O1N2S1', 'C2H2O1N4S0', 'C2H2O2N1S1', 'C2H2O2N3S0', 'C2H2O3N2S0', 'C2H2O4N1S0', 'C2H3O0N3S1', 'C2H3O0N5S0', 'C2H3O1N2S1', 'C2H3O1N4S0', 'C2H3O2N3S0', 'C2H3O3N2S0', 'C2H4O0N3S1', 'C2H4O0N5S0', 'C2H4O1N2S1', 'C2H4O1N4S0', 'C2H4O2N3S0', 'C2H4O3N2S0', 'C2H5O0N3S1', 'C2H5O0N5S0', 'C2H5O1N4S0', 'C2H5O2N3S0', 'C2H6O0N3S1', 'C2H6O0N5S0', 'C2H6O1N4S0', 'C2H6O2N3S0', 'C2H7O0N0S2', 'C2H7O0N5S0', 'C2H7O1N4S0', 'C2H7O2N0S1', 'C2H7O4N0S0', 'C2H8O0N0S2', 'C2H8O0N5S0', 'C2H8O1N4S0', 'C2H8O2N0S1', 'C2H8O4N0S0', 'C2H9O0N0S2', 'C2H9O0N5S0', 'C2H9O1N1S1', 'C2H9O2N0S1', 'C2H9O3N1S0', 'C2H9O4N0S