In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re  # For handling space-aligned labels correctly

def getModtranData(run):
    with open(run + '/tape7.scn') as fp:
        step1 = fp.readlines()
        activate = False
        data = []
        
        for line in step1:
            if "WAVLEN MCRN" in line:  # Ensure full label match
                activate = True
          
                labels = re.findall(r'\S+(?: \S+)?', line.strip())
                labels = [label for label in labels if label not in ['THRML SCT', 'DEPTH']]
                
            elif activate:
                if line.strip() == '-9999.':
                    activate = False
                    continue
                else:
                    data.append(line.strip().split())

        # Convert list to numpy array (handle empty case)
        if data:
            data2 = np.array(data, dtype=np.float32)
            runData = {label: data2[:, i] for i, label in enumerate(labels)}
        else:
            runData = {}

    return runData

# Usage 
runData = getModtranData('MODTRAL_models_2025/MLS_ALB0_WAT0.2')


In [7]:
import numpy as np
import os
import re

def getModtranData(run):
    file_path = os.path.join(run, 'tape7.scn')
    with open(file_path) as fp:
        step1 = fp.readlines()
        activate = False
        data = []
        labels = []
        
        for line in step1:
            if "WAVLEN" in line:
                activate = True
                labels = re.split(r'\s{2,}', line.strip())  # Split on 2+ spaces
                
                # Remove unwanted labels safely
                for unwanted in ['THRML SCT', 'DEPTH']:
                    if unwanted in labels:
                        labels.remove(unwanted)
                
            elif activate:
                if line.strip() == '-9999.':
                    activate = False
                    continue
                else:
                    data.append(line.strip().split())

        data2 = np.float32(data)
        runData = {label: data2[:, i] for i, label in enumerate(labels)}
        
    return runData

# base_dir = os.getcwd()  
# modtran_path = os.path.join(base_dir, 'MODTRAL_models_2025', 'MLS_ALB0_WAT0.2')
runData = getModtranData('MODTRAL_models_2025/MLS_ALB0_WAT0.2')

In [None]:
# def tape7scn(file_path):
#     data = {'WAVLEN MCRN': [], 'GRND RFLT': [], 'TOTAL RAD': []}
    
#     with open(file_path, 'r') as f:
#         lines = f.readlines()
        
#         for line in lines[11:]:  # Start from line 12 (index 11)
#             wavlen_mcrn = line[4:13].strip()  # Columns 5-13
#             grnd_rflt = line[75:86].strip()   # Columns 76-86
#             total_rad = line[97:108].strip()  # Columns 98-108
            
#             if wavlen_mcrn and grnd_rflt and total_rad:
#                 data['WAVLEN MCRN'].append(wavlen_mcrn)
#                 data['GRND RFLT'].append(grnd_rflt)
#                 data['TOTAL RAD'].append(total_rad)
    
#     return data

In [None]:
import numpy as np
import os
import re
import pandas as pd
from pathlib import Path

def process_all_runs(main_dir):
    all_dfs = []
    for root, dirs, files in os.walk(main_dir):
        for dir_name in dirs:
            if re.match(r".*_ALB\d+_WAT[\d.]+", dir_name):
                run_path = os.path.join(root, dir_name)
                tape7_path = os.path.join(run_path, "tape7.scn")
                if not os.path.exists(tape7_path):
                    continue  # Skip directories without tape7.scn
                
                try:
                    df = process_run(run_path)
                    all_dfs.append(df)
                except Exception as e:
                    print(f"Skipped {dir_name} due to error: {str(e)}")
    data_ = pd.concat(all_dfs, ignore_index=True)
    return data_

def process_run(run_path):
    run_data = getModtranData(run_path)
    dir_name = os.path.basename(run_path)

    alb_match = re.search(r"ALB(\d+)", dir_name)
    wvp_match = re.search(r"WAT([\d.]+)", dir_name)
    
    if not alb_match or not wvp_match:
        raise ValueError(f"Invalid directory name format: {dir_name}")
    
    alb_value = alb_match.group(1)  # "0" or "1"
    wvp_value = float(wvp_match.group(1))  # e.g., 0.2 → 0.2

    if 'ALB0' in dir_name:
        df = pd.DataFrame({
            'WAVLEN_MCRN': run_data['WAVLEN MCRN'],
            'TOTAL_RAD': run_data['TOTAL RAD'],
            'ALBEDO': alb_value,
            'WVP_COL': wvp_value
        })
    elif 'ALB1' in dir_name:
        df = pd.DataFrame({
            'WAVLEN_MCRN': run_data['WAVLEN MCRN'],
            'GRND_RFLT': run_data['GRND RFLT'],
            'ALBEDO': alb_value,
            'WVP_COL': wvp_value
        })
    else:
        raise ValueError(f"Unsupported ALB type in {dir_name}")
    
    return df

def getModtranData(run):
    file_path = os.path.join(run, 'tape7.scn')
    with open(file_path) as fp:
        step1 = fp.readlines()
        activate = False
        data = []
        labels = []
        
        for line in step1:
            if "WAVLEN" in line:
                activate = True
                labels = re.split(r'\s{2,}', line.strip()) # Handle multi-word entries
                
                for unwanted in ['THRML SCT', 'DEPTH']: # Remove unwanted labels safely
                    if unwanted in labels:
                        labels.remove(unwanted)
                
            elif activate:
                if line.strip() == '-9999.':
                    activate = False
                    continue
                else:
                    data.append(line.strip().split())

        data2 = np.float32(data)
        runData = {label: data2[:, i] for i, label in enumerate(labels)}
        
    return runData

# Usage
main_dir = os.path.join(os.getcwd(), 'MODTRAL_models_2025')
combined_df = process_all_runs(main_dir)

# Save or analyze the combined data
combined_df.to_csv('combined_results.csv', index=False)
print(combined_df)

Skipped SAW_ALB0_WAT1.0 due to error: too many indices for array: array is 1-dimensional, but 2 were indexed
Skipped SAW_ALB1_WAT0.5 due to error: too many indices for array: array is 1-dimensional, but 2 were indexed
        WAVLEN_MCRN  TOTAL_RAD ALBEDO  WVP_COL  GRND_RFLT
0             0.350   0.006005      0      0.2        NaN
1             0.351   0.005866      0      0.2        NaN
2             0.352   0.006040      0      0.2        NaN
3             0.353   0.005603      0      0.2        NaN
4             0.354   0.006349      0      0.2        NaN
...             ...        ...    ...      ...        ...
108043        2.596        NaN      1      2.0        0.0
108044        2.597        NaN      1      2.0        0.0
108045        2.598        NaN      1      2.0        0.0
108046        2.599        NaN      1      2.0        0.0
108047        2.600        NaN      1      2.0        0.0

[108048 rows x 5 columns]


In [32]:
import numpy as np
import os
import re
import pandas as pd
from pathlib import Path

def getModtranData(run):
    file_path = os.path.join(run, 'tape7.scn')
    with open(file_path) as fp:
        step1 = fp.readlines()
        activate = False
        data = []
        labels = []
        
        for line in step1:
            if "WAVLEN" in line:
                activate = True
                labels = re.split(r'\s{2,}', line.strip()) # Handle multi-word entries
                
                for unwanted in ['THRML SCT', 'DEPTH']: # Remove unwanted labels safely
                    if unwanted in labels:
                        labels.remove(unwanted)
                
            elif activate:
                if line.strip() == '-9999.':
                    activate = False
                    continue
                else:
                    data.append(line.strip().split())

        data2 = np.float32(data)
        runData = {label: data2[:, i] for i, label in enumerate(labels)}
        
    return runData

def process_all_profiles(main_dir):
    # Dictionary to hold DataFrames keyed by profile name
    profile_dfs = {}
    
    # Group directories by profile and water vapor
    profile_groups = {}
    
    # Walk through directory tree
    for root, dirs, files in os.walk(main_dir):
        for dir_name in dirs:
            # Match pattern: PROFILE_ALBx_WATy.y (e.g., "MLS_ALB0_WAT0.2")
            match = re.match(r"^([A-Za-z]+)_ALB(\d+)_WAT([\d.]+)$", dir_name)
            if match:
                profile, alb, wvp = match.groups()
                wvp = float(wvp)  # Convert to numeric
                
                # Create grouping key
                key = (profile, wvp)
                
                # Initialize group if new
                if key not in profile_groups:
                    profile_groups[key] = {'ALB0': None, 'ALB1': None}
                
                # Store path by albedo type
                run_path = os.path.join(root, dir_name)
                profile_groups[key][f'ALB{alb}'] = run_path

    # Process each profile group
    for (profile, wvp), paths in profile_groups.items():
        # Skip incomplete pairs
        if not paths['ALB0'] or not paths['ALB1']:
            print(f"Skipping incomplete pair: {profile}_WAT{wvp}")
            continue
            
        try:
            # Load ALB0 data (TOTAL_RAD)
            alb0_data = getModtranData(paths['ALB0'])
            alb0_df = pd.DataFrame({
                'WAVLEN_MCRN': alb0_data['WAVLEN MCRN'],
                'TOTAL_RAD': alb0_data['TOTAL RAD']
            })
            
            # Load ALB1 data (GRND_RFLCT)
            alb1_data = getModtranData(paths['ALB1'])
            alb1_df = pd.DataFrame({
                'WAVLEN_MCRN': alb1_data['WAVLEN MCRN'],
                'GRND_RFLCT': alb1_data['GRND RFLT']
            })
            
            # Merge on wavelength
            merged = pd.merge(alb0_df, alb1_df, on='WAVLEN_MCRN', how='inner')
            
            # Add metadata
            merged['PROFILE'] = profile
            merged['WATER'] = wvp
            
            # Add to final DataFrames
            if profile not in profile_dfs:
                profile_dfs[profile] = merged
            else:
                profile_dfs[profile] = pd.concat([profile_dfs[profile], merged])
                
        except Exception as e:
            print(f"Failed to process {profile} WAT{wvp}: {str(e)}")
    
    # Finalize DataFrames
    for profile in profile_dfs:
        # Reorder columns and sort
        profile_dfs[profile] = profile_dfs[profile][
            ['PROFILE', 'WAVLEN_MCRN', 'TOTAL_RAD', 'GRND_RFLCT', 'WATER']
        ].sort_values(by=['WAVLEN_MCRN', 'WATER'])
        
    return profile_dfs

# Example usage
main_dir = 'MODTRAL_models_2025'
profile_dataframes = process_all_profiles(main_dir)

# Access individual profiles
mls_df = profile_dataframes.get('MLS')
desert_df = profile_dataframes.get('DESERT')

# Save to CSV
mls_df.to_csv('MLS_atmospheric_profile.csv', index=False)

Failed to process SAW WAT0.5: too many indices for array: array is 1-dimensional, but 2 were indexed
Skipping incomplete pair: SAW_WAT1.0
