In [8]:
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# import re  # For handling space-aligned labels correctly

# def getModtranData(run):
#     with open(run + '/tape7.scn') as fp:
#         step1 = fp.readlines()
#         activate = False
#         data = []
        
#         for line in step1:
#             if "WAVLEN MCRN" in line:  # Ensure full label match
#                 activate = True
          
#                 labels = re.findall(r'\S+(?: \S+)?', line.strip())
#                 labels = [label for label in labels if label not in ['THRML SCT', 'DEPTH']]
                
#             elif activate:
#                 if line.strip() == '-9999.':
#                     activate = False
#                     continue
#                 else:
#                     data.append(line.strip().split())

#         # Convert list to numpy array (handle empty case)
#         if data:
#             data2 = np.array(data, dtype=np.float32)
#             runData = {label: data2[:, i] for i, label in enumerate(labels)}
#         else:
#             runData = {}

#     return runData

# # Usage 
# runData = getModtranData('MODTRAL_models_2025/MLS_ALB0_WAT0.2')


In [7]:
# import numpy as np
# import os
# import re

# def getModtranData(run):
#     file_path = os.path.join(run, 'tape7.scn')
#     with open(file_path) as fp:
#         step1 = fp.readlines()
#         activate = False
#         data = []
#         labels = []
        
#         for line in step1:
#             if "WAVLEN" in line:
#                 activate = True
#                 labels = re.split(r'\s{2,}', line.strip())  # Split on 2+ spaces
                
#                 # Remove unwanted labels safely
#                 for unwanted in ['THRML SCT', 'DEPTH']:
#                     if unwanted in labels:
#                         labels.remove(unwanted)
                
#             elif activate:
#                 if line.strip() == '-9999.':
#                     activate = False
#                     continue
#                 else:
#                     data.append(line.strip().split())

#         data2 = np.float32(data)
#         runData = {label: data2[:, i] for i, label in enumerate(labels)}
        
#     return runData

# # base_dir = os.getcwd()  
# # modtran_path = os.path.join(base_dir, 'MODTRAL_models_2025', 'MLS_ALB0_WAT0.2')
# runData = getModtranData('MODTRAL_models_2025/MLS_ALB0_WAT0.2')

In [None]:
# def tape7scn(file_path):
#     data = {'WAVLEN MCRN': [], 'GRND RFLT': [], 'TOTAL RAD': []}
    
#     with open(file_path, 'r') as f:
#         lines = f.readlines()
        
#         for line in lines[11:]:  # Start from line 12 (index 11)
#             wavlen_mcrn = line[4:13].strip()  # Columns 5-13
#             grnd_rflt = line[75:86].strip()   # Columns 76-86
#             total_rad = line[97:108].strip()  # Columns 98-108
            
#             if wavlen_mcrn and grnd_rflt and total_rad:
#                 data['WAVLEN MCRN'].append(wavlen_mcrn)
#                 data['GRND RFLT'].append(grnd_rflt)
#                 data['TOTAL RAD'].append(total_rad)
    
#     return data

In [3]:
import numpy as np
import os
import re
import pandas as pd
from pathlib import Path

def getModtranData(run):
    file_path = os.path.join(run, 'tape7.scn')
    with open(file_path) as fp:
        step1 = fp.readlines()
        activate = False
        data = []
        labels = []
        
        for line in step1:
            if "WAVLEN" in line:
                activate = True
                labels = re.split(r'\s{2,}', line.strip()) # Handle multi-word entries
                
                for unwanted in ['THRML SCT', 'DEPTH']: # Remove unwanted labels safely
                    if unwanted in labels:
                        labels.remove(unwanted)
                
            elif activate:
                if line.strip() == '-9999.':
                    activate = False
                    continue
                else:
                    data.append(line.strip().split())

        data2 = np.float32(data)
        runData = {label: data2[:, i] for i, label in enumerate(labels)}
        
    return runData

def process_all_profiles(main_dir):
    # Dictionary to hold DataFrames keyed by profile name
    profile_dfs = {}
    
    # Group directories by profile and water vapor
    profile_groups = {}
    
    # Walk through directory tree
    for root, dirs, files in os.walk(main_dir):
        for dir_name in dirs:
            # Match pattern: PROFILE_ALBx_WATy.y (e.g., "MLS_ALB0_WAT0.2")
            match = re.match(r"^([A-Za-z]+)_ALB(\d+)_WAT([\d.]+)$", dir_name)
            if match:
                profile, alb, wvp = match.groups()
                wvp = float(wvp)  # Convert to numeric
                
                # Create grouping key
                key = (profile, wvp)
                
                # Initialize group if new
                if key not in profile_groups:
                    profile_groups[key] = {'ALB0': None, 'ALB1': None}
                
                # Store path by albedo type
                run_path = os.path.join(root, dir_name)
                profile_groups[key][f'ALB{alb}'] = run_path

    # Process each profile group
    for (profile, wvp), paths in profile_groups.items():
        
        missing = []
        if not paths['ALB0']:
            missing.append(f"{profile}_ALB0_WAT{wvp:.1f}")
        if not paths['ALB1']:
            missing.append(f"{profile}_ALB1_WAT{wvp:.1f}")
        
        if missing:
            error_msg = f"Skipping incomplete pair: Missing {', '.join(missing)}"
            if paths['ALB0'] or paths['ALB1']:
                existing = [p for p in [paths['ALB0'], paths['ALB1']] if p]
                error_msg += f" (Only found: {', '.join(os.path.basename(p) for p in existing)})"
            print(error_msg)
            continue            
        try:
            # Load ALB0 data (TOTAL_RAD)
            alb0_data = getModtranData(paths['ALB0'])
            alb0_df = pd.DataFrame({
                'WAVLEN_MCRN': alb0_data['WAVLEN MCRN'],
                'TOTAL_RAD': alb0_data['TOTAL RAD']
            })
            
            # Load ALB1 data (GRND_RFLCT)
            alb1_data = getModtranData(paths['ALB1'])
            alb1_df = pd.DataFrame({
                'WAVLEN_MCRN': alb1_data['WAVLEN MCRN'],
                'GRND_RFLCT': alb1_data['GRND RFLT']
            })
            
            # Merge on wavelength
            merged = pd.merge(alb0_df, alb1_df, on='WAVLEN_MCRN', how='inner')
            
            # Add metadata
            merged['PROFILE'] = profile
            merged['WATER'] = wvp
            
            # Add to final DataFrames
            if profile not in profile_dfs:
                profile_dfs[profile] = merged
            else:
                profile_dfs[profile] = pd.concat([profile_dfs[profile], merged])
                
        except Exception as e:
            dir_names = [os.path.basename(p) for p in paths.values()]
            print(f"Error processing {profile} pair: {', '.join(dir_names)}")
            print(f"Error details: {str(e)}")
    
    return profile_dfs

# Example usage
main_dir = 'MODTRAL_models_2025'
profile_dataframes = process_all_profiles(main_dir)

# Report here
if not profile_dataframes:
    print("No valid atmospheric profiles found!")
else:
    # Process all profiles
    for profile_name, df in profile_dataframes.items():
        # 1. Save to CSV
        filename = f"{profile_name}_atmospheric_profile.csv"
        df.to_csv(filename, index=False)
        
        # 2. Access DataFrame for analysis
        print(f"\nProfile: {profile_name}")
        print(f"Data Shape: {df.shape}")
        print(df)
        

Error processing SAW pair: SAW_ALB0_WAT0.5, SAW_ALB1_WAT0.5
Error details: too many indices for array: array is 1-dimensional, but 2 were indexed
Error processing SAW pair: SAW_ALB0_WAT1.0, SAW_ALB1_WAT1.0
Error details: too many indices for array: array is 1-dimensional, but 2 were indexed

Profile: MLS
Data Shape: (11255, 5)
      WAVLEN_MCRN     TOTAL_RAD  GRND_RFLCT PROFILE  WATER
0           0.350  6.004700e-03    0.009890     MLS    0.2
1           0.351  5.866200e-03    0.009821     MLS    0.2
2           0.352  6.039800e-03    0.010291     MLS    0.2
3           0.353  5.603100e-03    0.009723     MLS    0.2
4           0.354  6.348600e-03    0.011199     MLS    0.2
...           ...           ...         ...     ...    ...
2246        2.596  3.719000e-08    0.000000     MLS    2.0
2247        2.597  4.941700e-08    0.000000     MLS    2.0
2248        2.598  5.588200e-08    0.000000     MLS    2.0
2249        2.599  7.014900e-08    0.000000     MLS    2.0
2250        2.600  6.3