## Recreating digest2 output for all the XML files 
We want to recreate the digest2 output for all the XML files from the NEO Surveyor mission. The digest2 score needs to contain all the available values. 

#### Setting up files and directories 

In [1]:
# Import
import subprocess
import time
import os

In [2]:
# Directory containing the XML files
xml_dir = "NEOSurveyordata-20241021/"
xml_files = [
    "2024-10-21T16_54_54.398_000000R8.xml",
    # "2024-10-21T16_56_11.525_000000R9.xml",
    # "2024-10-21T16_56_29.541_000000RA.xml",
    # "2024-10-21T16_57_05.908_000000RB.xml",
    # "2024-10-21T16_58_16.314_000000RD.xml",
    # "2024-10-21T16_59_02.987_000000RE.xml",
    # "2024-10-21T16_59_20.442_000000RF.xml",
    # "2024-10-21T16_59_38.838_000000RG.xml",
    # "2024-10-21T17_00_12.204_000000RH.xml",
    # "2024-10-21T17_01_32.492_000000RI.xml",
    # "2024-10-21T17_05_41.455_000000RJ.xml",
    # "2024-10-21T17_07_16.131_000000RK.xml",
    # "2024-10-21T17_07_31.399_000000RL.xml",
    # "2024-10-21T17_07_48.380_000000RM.xml",
    # "2024-10-21T17_08_19.703_000000RN.xml",
    # "2024-10-21T17_08_46.472_000000RO.xml",
    # "2024-10-21T17_09_12.251_000000RP.xml"
]

# Off-ecliptic and on-ecliptic XML files
off_ecliptic_file = "off_ecl_tracklet2desig3.dat"
on_ecliptic_file = "on_ecl_tracklet2desig3.dat"

# Digest2 executable
digest2_exec = "digest2/digest2/digest2"

In [3]:
# Match trksubs with designations from NEO Surveyor 
def match_desig_trksub(offecliptic_file:str, onecliptic_file:str) -> dict:
    """ Create a dictionary that matches the trksubs to the designations given by the NEO Surveyor Team """

    file_list = [offecliptic_file, onecliptic_file]
    # Empty dictionary
    trksub_desig = {}
    
    for f in file_list:
        # Check if the file exists
        if not os.path.isfile(f):
            print(f"File {f} not found.")
            return None
        # Open the file 
        with open(f, 'r', encoding='utf-8') as file:
            lines = file.readlines()
            for line in lines:
                # Create a dictionary to store the matches
                sp = line.split()
                trksub = sp[0]
                desig = sp[1]
                # NEA case
                if desig[3] == "0":
                    NEA = '0'
                else:
                    # MBA case
                    NEA = '1'
                trksub_desig[trksub] = [desig, NEA]
            
    return trksub_desig
        

In [4]:
# Create the output needed by the machine learning code 
def create_output_format(xml_file: str, digest2_result:list, trksub_desig:dict ) -> None:
    """ 
    Create the output format needed by the machine learning code.
    """
    # Create output file 
    output = open(xml_file.replace(".xml",".digest2"), "w", encoding='utf-8')
    # Write header; last column should be named 'orbtype' for ML purposes or 'class' for filtering purposes
    output.write("trksub,Int1,Int2,Neo1,Neo2,MC1,MC2,Hun1,Hun2,Pho1,Pho2,MB1_1,MB1_2,Pal1,Pal2,Han1,Han2,MB2_1,MB2_2,MB3_1,MB3_2,Hil1,Hil2,JTr1,JTr2,JFC1,JFC2\n")
        
    # Read digest2 results
    for line in digest2_result.splitlines()[2:]:
        # Split line into columns
        sp = line.split()
        # Find if the object is an NEA or not
        if sp[0] not in trksub_desig:
            # If the object is not in the dictionary, skip it
            print(f"Object {sp[0]} not found in trksub_desig dictionary.")
            continue
        NEAclass = trksub_desig[sp[0]][1]
        output_string = ','.join(sp) + ',' + NEAclass + "\n"
        # Write output string to file
        output.write(output_string)
    # Close output file
    output.close()
    
    print(f"Output file {xml_file.replace('.xml','.digest2')} created.")
        
    

In [5]:
# Create trksub dictionary 
trksub_desig = match_desig_trksub(xml_dir + off_ecliptic_file, xml_dir + on_ecliptic_file)

# Run digest2 on single files
for xml_file in xml_files:
    file_path = xml_dir + xml_file
    print(f"Processing: {file_path}")

    start_time = time.time() # starting a timer
    
    try:
        result = subprocess.run(
        f"{digest2_exec} -c MPC.config {file_path}",
        capture_output=True, text=True, check=True, shell=True
        )
        #print(f"Output: {result.stdout}")
        print(f"Error (if any): {result.stderr}")

        create_output_format(xml_file, result.stdout, trksub_desig)

    except subprocess.CalledProcessError as e:
        print(f"Error processing {xml_file}: {e}")
        print(f"Command output: {e.output}")

    end_time = time.time() # ending the timer
    elapsed_time = end_time - start_time 
    
    print(f"Time taken for {xml_file}: {elapsed_time:.4f} seconds\n")
    
    # Process the results
    #create_output_format(xml_file, result.stdout, trksub_desig)
    

Processing: NEOSurveyordata-20241021/2024-10-21T16_54_54.398_000000R8.xml
Error (if any): 
Object N0JM0002 not found in trksub_desig dictionary.
Object N0JM00p1 not found in trksub_desig dictionary.
Object N0JM00kG not found in trksub_desig dictionary.
Object N0JM0003 not found in trksub_desig dictionary.
Object N0JM015c not found in trksub_desig dictionary.
Object N0JM01IT not found in trksub_desig dictionary.
Object N0JM016j not found in trksub_desig dictionary.
Object N0JM01Ec not found in trksub_desig dictionary.
Object N0JM01CX not found in trksub_desig dictionary.
Object N0JM01WW not found in trksub_desig dictionary.
Object N0JM02Q3 not found in trksub_desig dictionary.
Object N0JM02bo not found in trksub_desig dictionary.
Object N0JM02J8 not found in trksub_desig dictionary.
Object N0JM02h1 not found in trksub_desig dictionary.
Object N0JM031S not found in trksub_desig dictionary.
Object N0JM02eZ not found in trksub_desig dictionary.
Object N0JM03Bi not found in trksub_desig dic