# prompt
Write a python script to do the following:

1. Identify the main_called subroutine or function that is labeled with 'Description: MAIN_CALL' inside its subroutine block specified by 'subroutine', 'end subroutine' block, or 'function', 'end function'.
2. For each subroutine or function, identify all use of 'variable => sruct%variable, & label" or 'variable => sruct%variable & label"
3. In the identified main_called function or subroutine, inside it identify all calling sequence in the format of "call sub1(var1,var2)', or 'res=my_func(var1,var2)', not all function names are of format 'xx_func'.
4. Now print the calling sequence in the following order
   subroutine main_called 
   alias use in the main_called subroutine

   call sub1(var1,var2)
   alias use in sub1

   call sub2(var1,var2)
   alias use in sub2

   res=my_func(var1,var2)
   alias use in my_func

   end main_called suborutine
5. make sure the print of alias is of the format
   var1 => structure%var1,  & !label
      

In [46]:
import os
import re

def preprocess_continuations(content):
    # Join lines ending with '&' with the next line, removing the '&' and any leading/trailing whitespace
    # This will make multi-line calls into single logical lines
    lines = content.splitlines()
    new_lines = []
    buffer = ''
    for line in lines:
        striped = line.rstrip()
        if striped.endswith('&'):
            buffer += striped[:-1] + ' '
        else:
            buffer += striped
            new_lines.append(buffer)
            buffer = ''
    if buffer:
        new_lines.append(buffer)
    return '\n'.join(new_lines)

def get_blocks(content):
    block_start_pat = re.compile(
        r'^\s*(subroutine|function)\s+(\w+)\s*\(.*?\)', re.IGNORECASE | re.MULTILINE)
    block_end_pat = re.compile(
        r'^\s*end\s+(subroutine|function)\s*(\w+)?', re.IGNORECASE | re.MULTILINE)
    blocks = []
    pos = 0
    while True:
        m = block_start_pat.search(content, pos)
        if not m:
            break
        kind, name = m.group(1).lower(), m.group(2)
        start = m.start()
        end_match = block_end_pat.search(content, m.end())
        if not end_match:
            break
        end = end_match.end()
        blocks.append((kind, name, start, end, content[start:end]))
        pos = end
    return blocks

def find_main_called(blocks):
    for kind, name, start, end, block_content in blocks:
        if re.search(r'Description:\s*MAIN_CALL', block_content, re.IGNORECASE):
            return (kind, name, block_content)
    return None

def extract_aliases(block_content):
    # This pattern preserves spacing and trailing comments after '&'
    alias_pat = re.compile(
        r'([a-zA-Z0-9_]+)\s*=>\s*([a-zA-Z0-9_]+%[a-zA-Z0-9_]+)\s*,?\s*&([^\n]*)', re.IGNORECASE)
    results = []
    for m in alias_pat.finditer(block_content):
        alias = m.group(1)
        target = m.group(2)
        after = m.group(3).rstrip()
        results.append((alias, target, after))
    return results

def extract_call_sequence(block_content):
    # call sub1(var1,var2)
    call_pat = re.compile(r'\bcall\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)', re.IGNORECASE)
    # NN = CanopyEnergyH2OIter_func(var1,var2)
    assign_func_pat = re.compile(r'\b([\w]+)\s*=\s*([a-zA-Z0-9_]+_func)\s*\(([^)]*)\)', re.IGNORECASE)
    calls = []
    lines = block_content.splitlines()
    for line in lines:
        call_m = call_pat.search(line)
        if call_m:
            name = call_m.group(1)
            args = call_m.group(2).strip()
            calls.append(('call', name, args, line.rstrip()))
            continue
        assign_m = assign_func_pat.search(line)
        if assign_m:
            resvar = assign_m.group(1)
            fname = assign_m.group(2)
            args = assign_m.group(3).strip()
            calls.append(('func', fname, args, line.rstrip()))
    return calls


def format_call_multiline(header, args, indent=8, width=80, is_func=False):
    """
    header: e.g. 'call UpdatePlantWaterVars' or 'NN=CanopyEnergyH2OIter_func'
    args: argument string, e.g. 'NZ,VHeatCapCanopyAir,TotalSoilPSIMPa_vr,...'
    indent: number of spaces for continued lines
    width: max line width
    is_func: True for function call (assignment style), False for subroutine call.
    Returns a formatted multiline string.
    """
    # Clean up and split arguments
    args = args.replace('\n', '').replace('\r', '')
    args_list = [a.strip() for a in args.split(',') if a.strip()]
    lines = []
    current = header + '('
    for i, arg in enumerate(args_list):
        # +3 for ', &' and +1 for ')' at the end
        if len(current) + len(arg) + 3 > width and current.strip() != f"{header}(":
            current = current.rstrip(', ')
            current += ', &'
            lines.append(current)
            current = ' ' * indent + arg
        else:
            if current.endswith('('):
                current += arg
            else:
                current += ',' + arg
    current = current.rstrip(', ')
    current += ')'
    lines.append(current)
    return '\n'.join(lines)


def call_seq_analyzer(filepath):
    filepath = str(filepath)
    if not os.path.isfile(filepath):
        print(f"File '{filepath}' does not exist.")
        return

    with open(filepath, 'r') as f:
        content = f.read()

    pre_content = preprocess_continuations(content)
    blocks = get_blocks(pre_content)
    main_block = find_main_called(blocks)
    if not main_block:
        print("No MAIN_CALL subroutine/function found.")
        return

    kind, main_name, main_content = main_block
    results=[]
    with open('call_seq_.txt', 'a') as f:
        
        f.write(f"subroutine {main_name}\n")

        main_aliases = extract_aliases(main_content)
        for alias, target, after in main_aliases:
            f.write(f"  {alias:<25} => {target:<30} , &{after}" if ',' in after else f"  {alias:<25} => {target:<30} &{after}\n")

        call_seq = extract_call_sequence(main_content)
        block_dict = {name.lower(): (kind, name, block_content) for kind, name, start, end, block_content in blocks}

        for call_type, sub, args, full_line in call_seq:
            if call_type == 'call':
                f.write(format_call_multiline(f"call {sub}", args, indent=8, width=80, is_func=False))
            else:
            # Try to extract the left-hand-side variable for function assignment
                assign_pat = re.compile(r'(\w+)\s*=\s*([a-zA-Z0-9_]+_func)\s*\((.*)', re.IGNORECASE)
                m = assign_pat.match(full_line.strip())

                if m:
                    lhs = m.group(1)
                    fname = m.group(2)
                    # Use the original argument string (args) for formatting
                    header = f"  {lhs}={fname}"
                    f.write(format_call_multiline(header, args, indent=8, width=80, is_func=True))
                else:
                    f.write(format_call_multiline(sub, args, indent=8, width=80, is_func=True))
            f.write("\n")
                
            sub_block = block_dict.get(sub.lower())
            if sub_block:
                sub_aliases = extract_aliases(sub_block[2])
                for alias, target, after in sub_aliases:
                    f.write(f"  {alias:<25} => {target:<30} , &{after}" if ',' in after else f"  {alias:<25} => {target:<30} &{after}\n")
                    results.append((alias, target, after))

        f.write(f"\nend {main_name} subroutine\n")
        print(f"\nend {main_name} subroutine\n")
        return results

#use of the analyzer
from pathlib import Path

filepath = Path('../../f90src/Plant_bgc/UptakesMod.F90')

alias_struct=call_seq_analyzer(filepath)

alias_list,target_list,label_list=[],[],[]
for alias, target, after in alias_struct:

    try:
        position = alias_list.index(alias)
        print(f"{value} found at position {position}")
    except ValueError:
        alias_list.append(alias)
        target_list.append(target)
        label_list.append(after)
    
#    print(f"  {alias:<25} => {target:<30} , &{after}" if ',' in after else f"  {alias:<25} => {target:<30} &{after}\n")


end RootUptakes subroutine

  ALT                       => plt_site%ALT                   & !input

  CanopyLeafArea_pft        => plt_morph%CanopyLeafArea_pft   & !input

  CanopyStemArea_pft        => plt_morph%CanopyStemArea_pft   & !input

  ElvAdjstedSoilH2OPSIMPa_vr => plt_ew%ElvAdjstedSoilH2OPSIMPa_vr & !input

  MY_pft                    => plt_morph%MY_pft               & !input

  MaxNumRootLays            => plt_site%MaxNumRootLays        & !input

  NK                        => plt_site%NK                    & !input

  NP                        => plt_site%NP                    & !input

  NP0                       => plt_site%NP0                   & !input

  NU                        => plt_site%NU                    & !input

  PopuRootMycoC_pvr         => plt_biom%PopuRootMycoC_pvr     & !input

  SoilBulkDensity_vr        => plt_soilchem%SoilBulkDensity_vr & !input

  SoilWatAirDry_vr          => plt_soilchem%SoilWatAirDry_vr  & !input

  VLMicP_vr                 =>