In [None]:
import subprocess
import os
import re

# --- 1. CONFIGURATION ---
# The raw data file is in our current directory
raw_filename = "bpsr100613_052818_beam08.sf"

# The base name for all the output files rfifind will create
output_basename = "bpsr100613_beam08"

# --- 2. DEFINE EXPECTED OUTPUT & PRE-EXECUTION CHECK ---
# We know rfifind creates several files, but the .mask file is the key output.
# We will check for its existence to decide whether to run the command.
mask_filename = output_basename + "_rfifind.mask"

print(f"Checking for existing output file: '{mask_filename}'")

# If the mask file already exists, we skip the execution step.
if os.path.exists(mask_filename):
    print("‚úÖ Files found. Skipping rfifind execution.")

# If the file does not exist, then we run the command.
else:
    # --- 3. EXECUTION ---
    print(f"\nFile not found. Starting RFI analysis on: {raw_filename}")
    try:
        command = [
            "rfifind",
            
            # --- PHYSICAL PARAMETERS ---
            #"-psrfits",          # Force the format (Safety for .sf files)
            "-time", "2.0",      # 2.0s integration: Better than "blocks" for handling fast-moving RFI
            
            # --- THRESHOLD TUNING ---
            "-timesig", "25.0",  # Reject time chunks > 10 sigma (removes impulsive RFI like lightning)
            "-freqsig", "8.0",   # Reject freq channels > 4 sigma (removes narrowband RFI like GPS)
            
            # --- SIFTING PARAMETERS ---
            "-chanfrac", "0.7",  # If 90% of channels in an interval are bad, kill the whole interval
            "-intfrac", "0.5",   # If 30% of intervals in a channel are bad, kill the whole channel
            
            # --- FORMAT FLAGS (Legacy Parkes Safe-Mode) ---
            "-noweights",
            "-noscales",
            "-nooffsets",
            
            # --- INPUT/OUTPUT ---
            "-o", output_basename,
            raw_filename
        ]
        # Run the command and check for errors
        subprocess.run(command, check=True)
        
        print("\n‚úÖ rfifind command finished executing.")

    # A more specific error if the 'rfifind' command itself isn't found
    except FileNotFoundError:
        print("\n‚ùå ERROR: 'rfifind' command not found.")
        print("   Please ensure that your PRESTO environment is activated/sourced correctly.")
    # A more specific error if rfifind runs but returns an error code
    except subprocess.CalledProcessError as e:
        print(f"\n‚ùå An error occurred during rfifind execution: {e}")
    # A general catch-all for other unexpected errors
    except Exception as e:
        print(f"\n‚ùå An unexpected error occurred: {e}")

# --- 4. FINAL VERIFICATION ---
# This final check confirms that the file either existed from the start
# or was successfully created by the code block above.
print("\n--- Verifying Output ---")
if os.path.exists(mask_filename):
    print(f"‚úÖ SUCCESS! The mask file '{mask_filename}' is present and ready.")
else:
    print(f"‚ùå VALIDATION FAILED: The mask file '{mask_filename}' was not found.")

In [None]:
import os
import glob
import subprocess

# --- 1. CONFIGURATION ---
# In WSL, D: drive is usually /mnt/d, E: is /mnt/e
usb_path = "/mnt/d/Presto_Project/thrd_psr" 

# Ensure the USB directory exists
if not os.path.exists(usb_path):
    print(f"‚ùå Error: USB path '{usb_path}' does not exist. Please create it or check the path.")
    exit()

# Input data (Read from fast SSD)
mask_filename = "bpsr100613_beam08_rfifind.mask"

# Metadata from the .sf file header (HARDCODED for safety based on your provided info)
total_samples_original = 8732672

# Output Base (Write to Slow USB)
# We join the USB path with the filename
output_basename = os.path.join(usb_path, "bpsr100613_beam08_topo")

# The de-dispersion plan (Standard L-Band Search)
ddplan_stages = [
    # Row 1: Covers DM 0 to 187.2 (Includes your target DM ~129)
    {'low_dm': 0.000,   'high_dm': 187.200, 'ddm': 0.10, 'downsamp': 1, 'num_dms': 1872},
    
    # Row 2
    {'low_dm': 187.200, 'high_dm': 332.800, 'ddm': 0.20, 'downsamp': 2, 'num_dms': 728},
    
    # Row 3
    {'low_dm': 332.800, 'high_dm': 508.800, 'ddm': 0.50, 'downsamp': 4, 'num_dms': 352}
]

total_expected_dms = sum(stage['num_dms'] for stage in ddplan_stages)

# --- 2. EXECUTION LOOP ---
print(f"--- Step 2: Multi-DM De-dispersion ---")
print(f"Target: PSR J1048-45832 (NP)")
print(f"Reading from: {raw_filename}")
print(f"Writing to:   {usb_path}")

for i, stage in enumerate(ddplan_stages):

    stage_basename = f"{output_basename}_stage{i}"
    print(f"\n--- Processing Stage {i+1}/{len(ddplan_stages)} ---")
    
    # Check for existing files
    expected_files_for_stage = stage['num_dms']
    # Note: We check the USB path for existing files
    existing_files = glob.glob(f"{stage_basename}*.inf")
    
    if len(existing_files) >= expected_files_for_stage:
        print(f"‚úÖ Stage {i+1} already complete on USB. Skipping.")
        continue

    # --- CALCULATE NUMOUT  ---
    # We must ensure the output length is an even number.
    # Formula: Original Samples / Downsampling Factor
    calculated_numout = int(total_samples_original / stage['downsamp'])
    
    # Ensure it's even 
    if calculated_numout % 2 != 0:
        calculated_numout -= 1
        
    print(f"   -> Downsample: {stage['downsamp']}")
    print(f"   -> Calculated numout: {calculated_numout}")

    try:
        command = [
            "prepsubband",
            # --- LAPTOP OPTIMIZATION ---
            "-ncpus", "4",           # Use 4 cores
            
            # --- PHYSICS & SIGNAL PROCESSING ---
            "-psrfits",              # Explicit format
            "-zerodm",               # Removes terrestrial RFI (Zero-DM subtraction)
            "-nobary",               # We stay topocentric for now (easier for initial search)
            "-numout", str(calculated_numout), # Enforce file length
            
            # --- DDPLAN PARAMETERS ---
            "-lodm", str(stage['low_dm']),
            "-dmstep", str(stage['ddm']),
            "-numdms", str(stage['num_dms']),
            "-downsamp", str(stage['downsamp']),
            "-nsub", "32",           # Standard subbanding
            
            # --- IO ---
            "-o", stage_basename,    # Writes to USB path
            "-mask", mask_filename,
            raw_filename             # Reads from SSD
        ]
        custom_env = os.environ.copy()
        custom_env["PRESTO"] = "presto"
        subprocess.run(command, 
                       check=True, 
                       #capture_output=True, 
                       env=custom_env
                      )
        print(f"‚úÖ Stage {i+1} finished successfully.")

    except subprocess.CalledProcessError as e:
        print(f"\n‚ùå PRESTO Error in stage {i+1}: {e}")
        break
    except Exception as e:
        print(f"\n‚ùå General Error: {e}")
        break

# --- 3. VERIFICATION ---
print("\n--- Verifying Output on USB Drive ---")
# Check the USB path
all_inf_files = glob.glob(f"{output_basename}*.inf")
all_dat_files = glob.glob(f"{output_basename}*.dat")

print(f"Expected: {total_expected_dms}")
print(f"Found .inf: {len(all_inf_files)}")
print(f"Found .dat: {len(all_dat_files)}")

if len(all_dat_files) == total_expected_dms:
    print("\n‚úÖ SUCCESS! All data is safely stored on the USB drive.")
else:
    print("\n‚ö†Ô∏è  WARNING: File count mismatch. Check drive space or errors.")

In [None]:
import os
import glob
import subprocess
import re

print("\n--- Step 3 : Acceleration Search (Grid Search Mode) ---")

# --- 1. CONFIGURATION ---
# Path to the data
DATA_DIRECTORY = "/mnt/d/Presto_Project/thrd_psr" 
DAT_GLOB_PATTERN = os.path.join(DATA_DIRECTORY, "bpsr100613_beam08_topo_*.dat")

# --- GRID SEARCH STRATEGIES ---
# We run ALL of these on every single file.
SEARCH_STRATEGIES = [
    # Strategy 1: The "Deep & Slow" (Standard Pulsars)
    # Catches slow, isolated pulsars and long-period signals.
    {"zmax": 0,   "numharm": 8,  "flo": 0.1, "suffix": "Iso_Slow"},
    
    # Strategy 2: The "Binary" (Standard Binaries)
    # Catches normal pulsars in wide/moderate orbits.
    {"zmax": 100, "numharm": 8,  "flo": 1.0, "suffix": "Binary"},
    
    # Strategy 3: The "MSP/Extreme" (Fast & Tight Orbits)
    # Catches Millisecond Pulsars and tight binaries (Black Widows).
    {"zmax": 200, "numharm": 16, "flo": 1.0, "suffix": "MSP_Fast"}
]
# --- 2. PRE-FLIGHT CHECK ---
dat_files_to_search = sorted(glob.glob(DAT_GLOB_PATTERN))

if not dat_files_to_search:
    print(f"‚ùå ERROR: No .dat files found in {DATA_DIRECTORY}.")
    print("   Please wait for Step 2 to finish completely.")
else:
    print(f"‚úÖ Found {len(dat_files_to_search)} .dat files.")
    print(f"‚öôÔ∏è  Each file will undergo {len(SEARCH_STRATEGIES)} distinct search strategies.")
    
    # --- 3. EXECUTION LOOP (Per File) ---
    for dat_filename in dat_files_to_search:
        
        filename_only = os.path.basename(dat_filename)
        print(f"\n==================================================")
        print(f"üìÇ Processing: {filename_only}")
        
        # Determine DM just for logging (Grid Search ignores this for decision making)
        try:
            dm_string = re.search(r"DM(\d+\.\d+)", dat_filename).group(1)
            current_dm = float(dm_string)
            print(f"   DM: {current_dm:.2f}")
        except:
            current_dm = 0.0

        # --- GRID SEARCH LOOP (Per Strategy) ---
        for strategy in SEARCH_STRATEGIES:
            zmax = strategy['zmax']
            numharm = strategy['numharm']
            flo = strategy['flo']
            name = strategy['suffix']

            print(f"   --> Strategy: {name} (zmax={zmax}, numharm={numharm}, flo={flo})")

            try:
                # --- CHECK EXISTENCE ---
                base_no_ext = os.path.splitext(dat_filename)[0]
                # PRESTO output naming rule: filename_ACCEL_zmax.cand
                expected_cand_file = f"{base_no_ext}_ACCEL_{zmax}.cand"

                if os.path.exists(expected_cand_file) and os.path.getsize(expected_cand_file) > 0:
                    print(f"       ‚úÖ Output exists ({name}). Skipping.")
                    continue

                # --- RUN ACCELSEARCH ---
                command = [
                    "accelsearch",
                    "-ncpus", "4",       # Safe for SSD
                    #"-inmem",           # Keep commented out if RAM is tight and RAM is tight in my case
                    "-numharm", str(numharm),
                    "-zmax", str(zmax),
                    "-flo", str(flo),
                    "-sigma", "2.0",
                    dat_filename
                ]
                
                custom_env = os.environ.copy()
                custom_env["PRESTO"] = "presto"

                # Run silently unless error
                subprocess.run(
                    command, check=True, env=custom_env, capture_output=True, text=True
                )
                print(f"       ‚úÖ Search Complete.")

            except subprocess.CalledProcessError as e:
                print(f"       ‚ùå PRESTO Error in strategy {name}:\n{e.stderr}")
            except Exception as e:
                print(f"       ‚ùå System Error: {e}")

        # --- FILE CLEANUP (Run only after ALL strategies are done for this file) ---
        # NOTE: Only uncomment this if you are sure you don't need the .dat file anymore.
        # Since we are running multiple strategies, we must not delete it inside the inner loop.
        if os.path.exists(dat_filename):
            # os.remove(dat_filename)
            # print(f"üóëÔ∏è  Deleted .dat file to free space.")
            pass
    print("\nüéâ --- All Searches Complete! --- üéâ")

In [None]:
import os
import glob
import subprocess
import sys

print("\n--- Step 4: Phase-Specific Sifting ---")

# --- 1. CONFIGURATION ---
# Path to your SSD processing folder
DATA_DIR = "/mnt/d/Presto_Project/thrd_psr" 
SIFTING_COMMAND = "ACCEL_sift.py" 

# Check Input Directory
if not os.path.exists(DATA_DIR):
    print(f"‚ùå ERROR: Data directory '{DATA_DIR}' not found.")
    sys.exit(1)

# ---  MAPPING FOR GRID SEARCH ---
# This matches the zmax values you used in Step 3.
phase_mapping = {
    # Strategy 1: Standard/Slow (zmax=0)
    "Iso_Slow_Search": "*_ACCEL_0.cand",
    
    # Strategy 2: Binary (zmax=100)
    "Binary_Search":   "*_ACCEL_100.cand",
    
    # Strategy 3: MSP/Extreme (zmax=200)
    "MSP_Fast_Search": "*_ACCEL_200.cand"
}

# --- 2. EXECUTION LOOP ---
for phase_name, file_pattern in phase_mapping.items():
    try:
        print(f"\n--- Processing Sifting Phase: '{phase_name}' ---")
        
        # Change Directory to Data Dir (Crucial for ACCEL_sift to find .inf files)
        os.chdir(DATA_DIR)

        # Define output file
        sifted_output_file = f"Sifted_Candidates_{phase_name}.txt"

        # Check if output exists to save time
        if os.path.exists(sifted_output_file) and os.path.getsize(sifted_output_file) > 0:
            print(f"‚úÖ Sifted file '{sifted_output_file}' already exists. Skipping.")
            continue 

        # Find the files using the pattern
        cand_files_for_phase = glob.glob(file_pattern)

        if not cand_files_for_phase:
            print(f"‚ö†Ô∏è  WARNING: No files found for pattern '{file_pattern}'.")
            print(f"    If you didn't run this specific Step 3 strategy, this is normal.")
            continue

        print(f"    Found {len(cand_files_for_phase)} candidate files. Running Sift...")
        
        # --- EXECUTION ---
        # Construct command
        command = [SIFTING_COMMAND] + cand_files_for_phase
        custom_env = os.environ.copy()
        custom_env["PRESTO"] = "presto"

        # Run command
        # Note: We use cwd=DATA_DIR so filenames in 'command' can be relative (shorter)
        # This helps avoid the "Argument list too long" error
        result = subprocess.run(
            command,
            check=True,
            capture_output=True, # Sifting prints results to STDOUT, we want to capture it
            text=True,
            env=custom_env,
            cwd=DATA_DIR 
        )
        
        sifted_candidates_output = result.stdout

        # --- PARSING RESULTS ---
        # We filter the output to count how many candidates survived
        # PRESTO output lines starting with # are headers.
        candidate_lines = [
            line for line in sifted_candidates_output.strip().split('\n')
            if line.strip() and not line.startswith('#') and "DM" in line
        ]
        num_cands = len(candidate_lines)

        print(f"    ‚úÖ Sift complete. Found {num_cands} unique candidates.")

        # Save to file
        with open(sifted_output_file, 'w') as f:
            f.write(sifted_candidates_output)
        
        if num_cands > 0:
            print(f"    üéâ RESULTS SAVED to: {sifted_output_file}")
        else:
            print(f"    üìâ No candidates passed the filter.")

    except subprocess.CalledProcessError as e:
        print(f"\n  ‚ùå Error during sifting phase '{phase_name}'.")
        print(f"  STDERR: {e.stderr.strip()}")
        
    except OSError as e:
        if e.errno == 7: 
            print(f"\n  ‚ùå ERROR: Argument list too long.")
            print(f"     You have {len(cand_files_for_phase)} files, which exceeds WSL limits.")
            print("     Advice: Step 3 might have produced too many candidates, or you need to split this batch manually.")
        else:
            print(f"\n  ‚ùå OS Error: {e}")

    except Exception as e:
        print(f"\n  ‚ùå Unexpected error: {e}")

print("\nüéâ --- All sifting phases complete! --- üéâ")

In [None]:
import os
import subprocess
import glob
import re
import shutil
import sys

print("--- Step 5: Folding Candidates ---")

# --- 1. CONFIGURATION ---
# A. Parent Folder
RAW_DATA_DIR = "/mnt/d/Presto_Project/thrd_psr" 

# B. Subfolder
CANDIDATE_DIR = RAW_DATA_DIR 

# C. Output Folder
OUTPUT_DIRECTORY = os.path.join(RAW_DATA_DIR, "CANDIDATE_PLOTS")
os.makedirs(OUTPUT_DIRECTORY, exist_ok=True)

# FILENAMES
RAW_FILENAME = "bpsr100613_052818_beam08.sf"
MASK_FILENAME = "bpsr100613_beam08_rfifind.mask"

# COMMANDS
PREPFOLD_COMMAND = "prepfold"
PS2PDF_COMMAND = "ps2pdf"

# --- Match the keys from Step 4 Grid Search ---
phase_suffixes = ["Iso_Slow_Search", "Binary_Search", "MSP_Fast_Search"]

# --- 2. CHECK RAW DATA EXISTENCE ---
raw_file_path = os.path.join(RAW_DATA_DIR, RAW_FILENAME)
mask_file_path = os.path.join(RAW_DATA_DIR, MASK_FILENAME)

if not os.path.exists(raw_file_path):
    print(f"‚ùå ERROR: Raw data not found: {raw_file_path}")
    sys.exit(1)
if not os.path.exists(mask_file_path):
    print(f"‚ùå ERROR: Mask not found: {mask_file_path}")
    sys.exit(1)

print(f"üìÇ Candidates: {CANDIDATE_DIR}")
print(f"üìÇ Output:     {OUTPUT_DIRECTORY}")

# --- 3. EXECUTION LOOP ---
for suffix in phase_suffixes:
    
    sifted_list_path = os.path.join(CANDIDATE_DIR, f"Sifted_Candidates_{suffix}.txt")
    print(f"\n--- Processing Sifted File: '{os.path.basename(sifted_list_path)}' ---")

    if not os.path.exists(sifted_list_path):
        print(f"  ‚ö†Ô∏è  File not found. (Did Step 4 generate candidates for this strategy?)")
        continue

    # --- PARSING THE FILE ---
    candidates_to_fold = []
    with open(sifted_list_path, 'r') as f:
        for line in f:
            if line.startswith('#') or not line.strip(): 
                continue
            
            parts = line.split()
            
            # Skip Headers
            if "File:Candidate" in parts[0] or parts[0] == "DM": 
                continue

            try:
                address = parts[0]
                if ":" not in address: continue

                cand_file_name, cand_num = address.split(':')

                # Fix extension if missing
                if not cand_file_name.endswith(".cand"):
                    cand_file_name += ".cand"
                    
                full_cand_path = os.path.join(CANDIDATE_DIR, cand_file_name)
                
                candidates_to_fold.append({
                    'path': full_cand_path,
                    'num': cand_num,
                    'shortname': cand_file_name
                })
            except (ValueError, IndexError):
                continue
    
    num_cands = len(candidates_to_fold)
    print(f"  Found {num_cands} candidates to fold.")
    
    # --- FOLDING LOOP ---
    for i, cand in enumerate(candidates_to_fold):
        cand_path = cand['path']
        cand_num = cand['num']
        
        dm_match = re.search(r'DM(\d+\.\d+)', cand['shortname'])
        dm_val = dm_match.group(1) if dm_match else "0.00"
        
        output_basename = f"Fold_{suffix}_DM{dm_val}_Cand{cand_num}"
        final_pdf_path = os.path.join(OUTPUT_DIRECTORY, f"{output_basename}.pdf")

        if os.path.exists(final_pdf_path):
            print(f"    ‚úÖ Plot exists for Cand {cand_num} (DM {dm_val}). Skipping.")
            continue

        print(f"    ‚öôÔ∏è  [{i+1}/{num_cands}] Folding Cand {cand_num} (DM {dm_val})...")

        try:
            command = [
                PREPFOLD_COMMAND,
                "-topo", 
                "-noxwin", 
                "-mask", mask_file_path,

                # Removed -nosearch so PRESTO can optimize
                # Disabled -zerodm for J0437
                "-zerodm", 
                "-n", "128", 
                "-nsub", "64",
                "-accelfile", cand_path,
                "-accelcand", cand_num,
                "-o", output_basename,
                raw_file_path
            ]
            
            custom_env = os.environ.copy()
            custom_env["PRESTO"] = "presto"

            # Run PRESTO
            subprocess.run(
                command, 
                check=True, 
                #capture_output=True, 
                text=True, 
                env=custom_env
            )
            
            # Convert to PDF
            ps_files = glob.glob(f"{output_basename}*.ps")
            if ps_files:
                ps_file = ps_files[0]
                subprocess.run([PS2PDF_COMMAND, ps_file], check=True)
                generated_pdf = ps_file.replace(".ps", ".pdf")
                if os.path.exists(generated_pdf):
                    shutil.move(generated_pdf, final_pdf_path)
                    print(f"       -> Saved: {os.path.basename(final_pdf_path)}")
                    os.remove(ps_file)
            else:
                print("    ‚ùå Error: No .ps file generated.")

        except subprocess.CalledProcessError:
            print(f"    ‚ùå Prepfold failed for Cand {cand_num}.")
        except Exception as e:
            print(f"    ‚ùå System Error: {e}")

print("\nüéâ --- Pipeline Finished! Check the 'CANDIDATE_PLOTS' folder. --- üéâ")