# Prototyping notebook for future version

In [11]:
:dep astrors
:dep polars
:dep numpy
:dep plotters
:dep glob

In [15]:
:dep regex

In [124]:
use std::path::Path;
use std::fs::File;
use glob::glob;
use polars::prelude::*;
use regex::Regex;
use astrors::io

fn main() -> Result<DataFrame, PolarsError> {
    let path_str = "C:/Users/hduva/Washington State University (email.wsu.edu)/Carbon Lab Research Group - Documents/Synchrotron Logistics and Data/ALS - Berkeley/Data/BL1101/2024Jun/XRR";

    let files = scan_directory(path_str);
    let (sample_names, scan_ids, file_ids) = parse_files(files);
    let df = create_dataframe(sample_names, scan_ids, file_ids)?;

    return Ok(df);
}

// Function to scan the directory for .fits files
fn scan_directory(path_str: &str) -> Vec<String> {
    let mut files = Vec::new();
    for entry in glob(&format!("{}/**/*.fits", path_str)).expect("Failed to read glob pattern") {
        match entry {
            Ok(path) => {
                let file_path = path.to_str().unwrap();
                if !file_path.contains("Captured Image") {
                    files.push(file_path.to_string());
                }
            }
            Err(e) => panic!("{:?}", e),
        }
    }
    files
}

// Function to parse file names and extract SampleName, ScanID, and FileID
fn parse_files(files: Vec<String>) -> (Vec<String>, Vec<String>, Vec<String>) {
    let re = Regex::new(r"([0-9]{5})-(\d+)\.fits$").unwrap();
    let mut sample_names = Vec::new();
    let mut scan_ids = Vec::new();
    let mut file_ids = Vec::new();

    for file_path in files {
        get_header(&file_path);
        let file_name = Path::new(&file_path).file_name().unwrap().to_str().unwrap();

        if let Some(caps) = re.captures(file_name) {
            let scan_id = caps.get(1).unwrap().as_str().to_string();
            let file_id = caps.get(2).unwrap().as_str().to_string();
            let sample_name = file_name.split(scan_id.as_str()).collect::<Vec<&str>>()[0].trim_end_matches('-').to_string();

            sample_names.push(sample_name);
            scan_ids.push(scan_id);
            file_ids.push(file_id);
        } else {
            println!("No match for file: {}", file_name);
        }
    }
    (sample_names, scan_ids, file_ids)
}

// Function to create a DataFrame from the collected data
fn create_dataframe(sample_names: Vec<String>, scan_ids: Vec<String>, file_ids: Vec<String>) -> Result<DataFrame, PolarsError> {
    let df = DataFrame::new(vec![
        Series::new("SampleName", &sample_names),
        Series::new("ScanID", &scan_ids),
        Series::new("FileID", &file_ids),
    ])?;
    Ok(df)
}

/// Function to extract header data from the files
fn get_header(file_path: &str) {
    let mut fits_file = File::open(file_path).unwrap();
    let mut header = io::Header::new();
    header.read_from_file(&mut fits_file).unwrap();
    header.pretty_print_advanced();
}

let df = main();
println!("{:?}", df);