In [86]:
from m2_functions import *
import numpy as np
import os
import re
from collections import defaultdict

In [87]:
def parse_data(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    # Split the content into individual data points
    data_points = content.split('\n\n')

    # Initialize lists to store the parsed data
    variables_list = []
    measured_variables_list = []
    constants_list = []
    non_measured_variables_list = []
    equations_list = []
    derivatives_list = []

    # Regular expressions to extract the relevant information
    variables_pattern = re.compile(r'Variables:\s*(.*?)\n', re.DOTALL)
    measured_variables_pattern = re.compile(r'Measured Variables:\s*(.*?)\n', re.DOTALL)
    constants_pattern = re.compile(r'Constants:\s*(.*?)\n', re.DOTALL)
    non_measured_variables_pattern = re.compile(r'Non Measured Variables:\s*(.*?)\n', re.DOTALL)
    equations_pattern = re.compile(r'Equations:\s*(.*?)(?=\n\s*\d+:|\Z)', re.DOTALL)
    derivatives_pattern = re.compile(r'Derivatives:\s*(.*?)\n', re.DOTALL)

    for data_point in data_points:
        # Extract variables
        variables_match = variables_pattern.search(data_point)
        if variables_match:
            variables = variables_match.group(1).strip().split(',')
            variables_list.append(variables)

        # Extract measured variables
        measured_variables_match = measured_variables_pattern.search(data_point)
        if measured_variables_match:
            measured_variables = measured_variables_match.group(1).strip().split(',')
            measured_variables_list.append(measured_variables)

        # Extract constants
        constants_match = constants_pattern.search(data_point)
        if constants_match:
            constants = constants_match.group(1).strip().split(',')
            constants_list.append(constants)

        # Extract non-measured variables
        non_measured_variables_match = non_measured_variables_pattern.search(data_point)
        if non_measured_variables_match:
            non_measured_variables = non_measured_variables_match.group(1).strip().split(',')
            non_measured_variables_list.append(non_measured_variables)

        # Extract equations
        equations_match = equations_pattern.search(data_point)
        if equations_match:
            equations = equations_match.group(1).strip().split('\n')
            equations = [eq.strip() for eq in equations if eq.strip()]  # Remove empty lines
            equations_list.append(equations)

        # Extract derivatives
        derivatives_match = derivatives_pattern.search(data_point)
        if derivatives_match:
            derivatives = derivatives_match.group(1).strip().split(',')
            derivatives_list.append(derivatives)

    return {
        'variables': variables_list,
        'measured_variables': measured_variables_list,
        'constants': constants_list,
        'non_measured_variables': non_measured_variables_list,
        'equations': equations_list,
        'derivatives': derivatives_list
    }


In [88]:
def parse_data(file_path):
    data = {
        'variables': [],
        'constants': [],
        'derivatives': [],
        'equations': []
    }
    
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    section = None
    for line in lines:
        line = line.strip()
        
        if line.startswith("Variables:"):
            section = 'variables'
            data[section] = eval(line.split("Variables:")[1].strip())
        elif line.startswith("Constants:"):
            section = 'constants'
            data[section] = eval(line.split("Constants:")[1].strip())
        elif line.startswith("Derivatives:"):
            section = 'derivatives'
            data[section] = eval(line.split("Derivatives:")[1].strip())
        elif line.startswith("Equations:"):
            section = 'equations'
            data[section] = []
        elif section == 'equations' and line and not line.startswith("Units of measure:") and not line.startswith("Target Polynomial:"):
            data['equations'].append(line)
    
    return data


In [89]:
def extract_variables(equation):
    # Use regex to find all variable-like patterns
    # This matches sequences of letters and numbers, but excludes standalone numbers
    variables = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', equation)
    
    # Remove duplicates by converting to a set, then back to a list
    return list(set(variables))

def get_variables_from_equations(equations):
    # Apply extract_variables to each equation in the list
    return [extract_variables(eq) for eq in equations]

In [104]:
# Iterate through all the data points and run the projection function

n = 82

for i in range(n):
    file_path = f'target_polynomial_benchmark/system{i}.txt'
    parsed_data = parse_data(file_path)
    print(parsed_data)
    variables = parsed_data['variables'] + parsed_data['constants'] + parsed_data['derivatives']
    equations = parsed_data['equations']
    axiom_variables = get_variables_from_equations(equations)

    np.random.shuffle(variables)
    j=0
    while j < len(variables):
        temp_measured_variables = variables[:j+1]
        non_measured_variables = list(set(variables) - set(temp_measured_variables))


        if any(set(temp_measured_variables).issubset(set(axiom_vars)) for axiom_vars in axiom_variables) or any(set(axiom_vars).issubset(set(temp_measured_variables)) for axiom_vars in axiom_variables):
            if j == len(variables) - 1:
                j = 0
                np.random.shuffle(variables)
            j+=1
            continue

        projection(variables, equations, temp_measured_variables, non_measured_variables, filename='temp.txt')

        with open('temp.txt', 'r') as temp_file:
            content = temp_file.read()
            os.remove('temp.txt')
            if "matrix {}" not in content:

                if not os.path.exists('target_polynomials'):
                    os.makedirs('target_polynomials')
                with open('target_polynomials/system{}_processed.txt'.format(i), 'w') as file:

                    file.write("System number {}\n".format(i))
                    file.write(f"Measured Variables: {temp_measured_variables}\n")
                    file.write(f"Non-Measured Variables: {non_measured_variables}\n")
                    file.write("Equations:\n")
                    for eq in equations:
                        file.write(eq)
                        file.write("\n")
                    file.write("\n")
                    file.write("Target Polynomial:\n")
                    write = False
                    for line in content.split("\n"):
                        if write:
                            file.write(line)
                        if "Polynomials of the Gröbner basis of the eliminated ideal:" in line:
                            print(line)
                            write = True
                break
            else:
                if j == len(variables) - 1:
                    j = 0
                    np.random.shuffle(variables)
            
        j += 1


{'variables': ['d1', 'd2', 'm1', 'm2', 'w', 'Fg'], 'constants': ['G'], 'derivatives': ['dx1dt', 'd2x1dt2', 'dx2dt', 'd2x2dt2'], 'equations': ['-d2x1dt2 + d2x2dt2 + d2*w^2', '2*dx1dt*d1*m1^2*w^2 - dx2dt*Fg*m2 + dx2dt*d1*m2^2*w^2 - Fg*d1*m1*w', '-G*m1 + dx1dt*dx2dt*d2 + dx2dt^2*d2', '-dx1dt - 2*dx2dt + d1*w + d2*w', 'd2x1dt2*m1 + d2x2dt2*m1 - Fg']}
Output from Macaulay2:
Ring defined: R
Axioms defined: {d2*w^2+d2x2dt2-d2x1dt2, 2*dx1dt*d1*m1^2*w^2+d1*dx2dt*m2^2*w^2-d1*m1*w*Fg-dx2dt*m2*Fg, dx1dt*dx2dt*d2+dx2dt^2*d2-G*m1, -dx1dt+d1*w-2*dx2dt+d2*w, d2x2dt2*m1+m1*d2x1dt2-Fg}
Measured variables defined: {dx1dt, d1, dx2dt, G}
Non-measured variables defined: {Fg, m2, d2x2dt2, m1, d2, d2x1dt2, w}
Ideal defined: ideal(d2*w^2+d2x2dt2-d2x1dt2,2*dx1dt*d1*m1^2*w^2+d1*dx2dt*m2^2*w^2-d1*m1*w*Fg-dx2dt*m2*Fg,dx1dt*dx2dt*d2+dx2dt^2*d2-G*m1,-dx1dt+d1*w-2*dx2dt+d2*w,d2x2dt2*m1+m1*d2x1dt2-Fg)
Gröbner basis: matrix {{d2x2dt2*m1+m1*d2x1dt2-Fg, d2*w^2+d2x2dt2-d2x1dt2, 4*dx2dt^4*m2^2*m1^2*d2x1dt2^2-4*dx2dt^4*m2^2

In [11]:
def extract_target_polynomial(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    target_polynomial = ""

    target_polynomial_line = next(line for line in lines if line.startswith('Target Polynomial:'))
    target_polynomial = target_polynomial_line.split('Target Polynomial:')[1].strip()

    return target_polynomial

def extract_variables(equation):
    # Use regex to find all variable-like patterns
    # This matches sequences of letters and numbers, but excludes standalone numbers
    variables = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', equation)
    
    # Remove duplicates by converting to a set, then back to a list
    return list(set(variables))

def get_variable_degrees(polynomial_str):
    """
    Returns a dictionary of degrees for each variable in the polynomial.
    """
    # Extract all unique variables from the polynomial string
    variables = set(extract_variables(polynomial_str))
    degrees = {var: 0 for var in variables}  # Initialize degrees to 0

    # Split the polynomial into terms
    terms = re.split(r'[+-]', polynomial_str.replace(' ', ''))

    for term in terms:
        if not term:
            continue
        # Split the term into factors
        factors = term.split('*')
        for factor in factors:
            # Check if the factor is a variable raised to a power (e.g., c^2)
            if '^' in factor:
                var, power = factor.split('^')
                degrees[var] = max(degrees[var], int(power))
            # Check if the factor is a variable without a power (e.g., W)
            elif factor in variables:
                degrees[factor] = max(degrees[factor], 1)

    return degrees

def process_directory(directory, n):
    """
    Processes all system_{i}.txt files in the directory and prints the degrees of variables in the target polynomial.
    """
    for i in range(70, n):
        file_name = f"system{i}.txt"
        file_path = os.path.join(directory, file_name)
        
        if not os.path.exists(file_path):
            print(f"File {file_name} does not exist. Skipping...")
            continue
        
        try:
            # Extract the target polynomial
            target_polynomial = extract_target_polynomial(file_path)
            # Get degrees of variables
            degrees = get_variable_degrees(target_polynomial)
            # Output the degrees as an array
            print(f"System {i}: {list(degrees.values())}")
        except Exception as e:
            print(f"Error processing {file_name}: {e}")


In [37]:
# Define the directory containing the system files
directory = 'target_polynomial_benchmark/'

# Function to extract variables from the target polynomial
def extract_variables_from_polynomial(equation, constants, derivatives):
    variables = re.findall(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', equation)
    
    # Remove duplicates by converting to a set, then back to a list
    unique_variables = list(set(variables))
    print(unique_variables)    
    # Identify observed constants and derivatives
    observed_constants = [const for const in constants if const in unique_variables]
    observed_derivatives = [deriv for deriv in derivatives if deriv in unique_variables]
    
    # Remove constants and derivatives from the variables list
    variables_filtered = [var for var in unique_variables if var not in constants and var not in derivatives]
    
    return variables_filtered, observed_constants, observed_derivatives

# Function to calculate the degree of each variable in the target polynomial
def calculate_variable_degrees(polynomial, variables):
    degree_dict = defaultdict(int)
    for var in variables:
        # Find all occurrences of the variable with exponents
        matches = re.findall(rf'{var}\^(\d+)', polynomial)
        if matches:
            # Sum all exponents for the variable
            degree_dict[var] = sum(int(exp) for exp in matches)
        else:
            # If no exponent, assume degree 1
            degree_dict[var] = 1
    return degree_dict

# Iterate through each file in the directory
for i in range(70,71):
    filepath = f'target_polynomial_benchmark/system{i}.txt'

    if not os.path.exists(filepath):
            print(f"File system{i}.txt does not exist. Skipping...")
            continue
    
    parsed_data = parse_data(file_path)
    variables = parsed_data['variables'] 
    constants = parsed_data['constants'] 
    derivatives = parsed_data['derivatives']
    equations = parsed_data['equations']
        
    with open(filepath, 'r') as file:
        content = file.readlines()
    
        # Extract the system number
        system_number = next(line for line in content if line.startswith('System number'))
        
        # Extract measured variables
        measured_variables = next(line for line in content if line.startswith('Measured Variables:'))
                
        # Extract equations
        equations_start = content.index(next(line for line in content if line.startswith('Equations:')))
        units_of_measure_start = content.index(next(line for line in content if line.startswith('Units of measure:')))
        equations = ''.join(content[equations_start:units_of_measure_start])

        units_of_measure = next(line for line in content if line.startswith('Units of measure:'))
        
        # Extract target polynomial
        target_polynomial_start = content.index(next(line for line in content if line.startswith('Target Polynomial:')))
        target_polynomial = ''.join(content[target_polynomial_start:]).split('Target Polynomial:')[1].strip()
        
        # Extract variables, observed constants, and observed derivatives
        target_variables, observed_constants, observed_derivatives = extract_variables_from_polynomial(target_polynomial, constants, derivatives)
        
        # Calculate the degree of each variable in the target polynomial
        variable_degrees = calculate_variable_degrees(target_polynomial, target_variables)
        
        # Sort variables by degree in descending order
        sorted_variables = sorted(target_variables, key=lambda x: variable_degrees[x], reverse=True)
        
        # Reconstruct the content in the desired order
        new_content = (
            system_number +
            f"Variables: {variables}\n" +
            f"Measured Variables: {sorted_variables}\n" +
            f"Non Measured Variables: {[var for var in variables if var not in sorted_variables]}\n" +
            f"Constants: {constants}\n" +
            f"Observed Constants: {observed_constants}\n" +
            f"Derivatives: {derivatives}\n" +
            f"Observed Derivatives: {observed_derivatives}\n" +
            f"{units_of_measure}" +
            "" +
            ''.join(equations) + 
            f"Target Polynomial: {target_polynomial}"
        )
    
        output_filepath = f'target_polynomial_benchmark/system{i}_processed.txt'
        # Write the updated content back to the file
        with open(output_filepath, 'w') as file:
            file.write(new_content)

print("All files have been updated successfully.")

['m2', 'm1', 'Fg', 'd2', 'd1', 'd2x2dt2']
All files have been updated successfully.
