## PCR GLOB logging 

In [1]:
import re
import pandas as pd



In [2]:
# Read the file
with open('variable_list.py', 'r') as f:
    text = f.read()

# Split the text into blocks by double newlines
blocks = text.split('\n\n')

# Prepare lists to store data
variables = []
short_names = []
comments = []

# Loop through each block
for block in blocks:
    # Find the PCR-GLOBWB variable name
    var_match = re.search(r"pcrglobwb_variable_name\s*=\s*'([^']+)'", block)
    short_name_match = re.search(r"netcdf_short_name\[pcrglobwb_variable_name\]\s*=\s*'([^']+)'", block)
    comment_match = re.search(r"comment\[pcrglobwb_variable_name\]\s*=\s*\"([^\"]*)\"", block)
    
    if var_match:
        variables.append(var_match.group(1))
        short_names.append(short_name_match.group(1) if short_name_match else None)
        comments.append(comment_match.group(1) if comment_match else None)

# Create a DataFrame
df = pd.DataFrame({
    'pcrglobwb_variable_name': variables,
    'netcdf_short_name': short_names,
    'comment': comments
})

# Save to CSV or Excel
df.to_csv('output_table.csv', index=False)
print(df)

    pcrglobwb_variable_name                   netcdf_short_name comment
0                  actualET            land_surface_evaporation    None
1             precipitation                       precipitation    None
2               temperature                         temperature    None
3            referencePotET     reference_potential_evaporation    None
4     totalLandSurfacePotET  land_surface_potential_evaporation    None
..                      ...                                 ...     ...
157            ulyssesSMUpp                               UppSM    None
158            ulyssesSMLow                               LowSM    None
159          ulyssessRefPET                              RefPET    None
160         ulyssessCropPET                             CropPET    None
161              ulyssesTWS                                 TWS    None

[162 rows x 3 columns]


In [3]:
df.to_csv('pcrglobwb_variables.csv', index=False)

In [4]:
import re
import pandas as pd

# Read file
with open('variable_list.py', 'r') as f:
    text = f.read()

# Split into blocks
blocks = text.split('\n\n')

variables = []
short_names = []
comments = []

for block in blocks:
    # PCR-GLOBWB variable name
    var_match = re.search(r"pcrglobwb_variable_name\s*=\s*'([^']+)'", block)
    if not var_match:
        continue
    var_name = var_match.group(1)
    
    # netcdf_short_name
    sn_match = re.search(r"netcdf_short_name\[pcrglobwb_variable_name\]\s*=\s*(?:'([^']+)'|pcrglobwb_variable_name)", block)
    if sn_match:
        short_name = sn_match.group(1) if sn_match.group(1) else var_name
    else:
        short_name = None
    
    # comment
    comment_match = re.search(r"comment\[pcrglobwb_variable_name\]\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", block)
    comment_text = None
    if comment_match:
        comment_text = comment_match.group(1) if comment_match.group(1) else comment_match.group(2)
    
    variables.append(var_name)
    short_names.append(short_name)
    comments.append(comment_text)

# Create DataFrame
df = pd.DataFrame({
    'pcrglobwb_variable_name': variables,
    'netcdf_short_name': short_names,
    'comment': comments
})

# Export to CSV or Excel
df.to_csv('pcrglobwb_variables.csv', index=False)
df.to_excel('pcrglobwb_variables.xlsx', index=False)

print(df)


    pcrglobwb_variable_name                   netcdf_short_name  \
0                  actualET            land_surface_evaporation   
1             precipitation                       precipitation   
2               temperature                         temperature   
3            referencePotET     reference_potential_evaporation   
4     totalLandSurfacePotET  land_surface_potential_evaporation   
..                      ...                                 ...   
157            ulyssesSMUpp                               UppSM   
158            ulyssesSMLow                               LowSM   
159          ulyssessRefPET                              RefPET   
160         ulyssessCropPET                             CropPET   
161              ulyssesTWS                                 TWS   

                                               comment  
0                                                 None  
1                                                 None  
2                       

In [5]:


# Read file
with open('variable_list.py', 'r') as f:
    text = f.read()

# Split into blocks
blocks = text.split('\n\n')

variables = []
short_names = []
comments = []

for block in blocks:
    # PCR-GLOBWB variable name
    var_match = re.search(r"pcrglobwb_variable_name\s*=\s*'([^']+)'", block)
    if not var_match:
        continue
    var_name = var_match.group(1)
    
    # netcdf_short_name
    sn_match = re.search(r"netcdf_short_name\[pcrglobwb_variable_name\]\s*=\s*(?:'([^']+)'|pcrglobwb_variable_name)", block)
    if sn_match:
        if sn_match.group(1):
            short_name = sn_match.group(1)
        else:
            # Assigned to the variable itself → flag as warning
            short_name = 'will generate error'
    else:
        short_name = None
    
    # comment
    comment_match = re.search(r"comment\[pcrglobwb_variable_name\]\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", block)
    comment_text = None
    if comment_match:
        comment_text = comment_match.group(1) if comment_match.group(1) else comment_match.group(2)
    
    variables.append(var_name)
    short_names.append(short_name)
    comments.append(comment_text)

# Create DataFrame
df = pd.DataFrame({
    'pcrglobwb_variable_name': variables,
    'netcdf_short_name': short_names,
    'comment': comments
})

# Export to CSV or Excel
df.to_csv('pcrglobwb_variables.csv', index=False)
df.to_excel('pcrglobwb_variables.xlsx', index=False)

print(df)


    pcrglobwb_variable_name                   netcdf_short_name  \
0                  actualET            land_surface_evaporation   
1             precipitation                       precipitation   
2               temperature                         temperature   
3            referencePotET     reference_potential_evaporation   
4     totalLandSurfacePotET  land_surface_potential_evaporation   
..                      ...                                 ...   
157            ulyssesSMUpp                               UppSM   
158            ulyssesSMLow                               LowSM   
159          ulyssessRefPET                              RefPET   
160         ulyssessCropPET                             CropPET   
161              ulyssesTWS                                 TWS   

                                               comment  
0                                                 None  
1                                                 None  
2                       

In [6]:
import re
import pandas as pd

# Read file
with open('variable_list.py', 'r') as f:
    text = f.read()

# Split into blocks
blocks = text.split('\n\n')

variables = []
short_names = []
comments = []

for block in blocks:
    # PCR-GLOBWB variable name
    var_match = re.search(r"pcrglobwb_variable_name\s*=\s*'([^']+)'", block)
    if not var_match:
        continue
    var_name = var_match.group(1)
    
    # netcdf_short_name
    sn_match = re.search(r"netcdf_short_name\[pcrglobwb_variable_name\]\s*=\s*(?:'([^']+)'|pcrglobwb_variable_name)", block)
    if sn_match:
        if sn_match.group(1):
            short_name = sn_match.group(1)
            warning = ""
        else:
            # Assigned to the variable itself → leave empty, add warning to comment
            short_name = None
            warning = "netcdf_short_name assigned to variable itself, will generate error"
    else:
        short_name = None
        warning = ""
    
    # comment
    comment_match = re.search(r"comment\[pcrglobwb_variable_name\]\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", block)
    comment_text = None
    if comment_match:
        comment_text = comment_match.group(1) if comment_match.group(1) else comment_match.group(2)
    
    # Combine comment with warning if needed
    if warning:
        if comment_text:
            comment_text = comment_text + " | " + warning
        else:
            comment_text = warning
    
    variables.append(var_name)
    short_names.append(short_name)
    comments.append(comment_text)

# Create DataFrame
df = pd.DataFrame({
    'pcrglobwb_variable_name': variables,
    'netcdf_short_name': short_names,
    'comment': comments
})

# Export to CSV or Excel
df.to_csv('pcrglobwb_variables.csv', index=False)
df.to_excel('pcrglobwb_variables.xlsx', index=False)

print(df)


    pcrglobwb_variable_name                   netcdf_short_name  \
0                  actualET            land_surface_evaporation   
1             precipitation                       precipitation   
2               temperature                         temperature   
3            referencePotET     reference_potential_evaporation   
4     totalLandSurfacePotET  land_surface_potential_evaporation   
..                      ...                                 ...   
157            ulyssesSMUpp                               UppSM   
158            ulyssesSMLow                               LowSM   
159          ulyssessRefPET                              RefPET   
160         ulyssessCropPET                             CropPET   
161              ulyssesTWS                                 TWS   

                                               comment  
0                                                 None  
1                                                 None  
2                       

In [11]:
import ewatercycle.models

print(dir(ewatercycle.models))

['HBV', 'HBVLocal', 'Hype', 'LeakyBucket', 'Lisflood', 'MarrmotM01', 'MarrmotM14', 'ModelSources', 'PCRGlobWB', 'Wflow', 'WflowJl', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '_model', '_model_entrypoints', '_models', 'eWaterCycleModel', 'entry_points', 'get_package_name', 'packages_distributions', 'shared', 'sources']
