In [1]:
# File: report_lookups.py

import csv

# ---------------------------------------------------------------------------
# ADJUST THESE IMPORTS TO MATCH YOUR OWN FOLDER/FILE STRUCTURE
# ---------------------------------------------------------------------------
# Example (assuming you have your code in packages named exactly "DHW", "Elec", etc.):
# from DHW.dhw_lookup import dhw_lookup
# from Elec.lighting_lookup import lighting_lookup
# from ventilation.ventilation_lookup import ventilation_lookup
# from setzone.zone_sizing_lookup import zone_sizing_lookup
# from tempground.groundtemp_lookup import groundtemp_lookup

# If your code is not set up as Python packages, you might do:
# from .dhw_lookup import dhw_lookup
# or
# import sys
# sys.path.append("D:/Documents/E_Plus_2026_py/DHW")
# from dhw_lookup import dhw_lookup
#
# For now, let's assume you can just import them directly:
try:
    from DHW.dhw_lookup import dhw_lookup
    from Elec.lighting_lookup import lighting_lookup
    from ventilation.ventilation_lookup import ventilation_lookup
    from setzone.zone_sizing_lookup import zone_sizing_lookup
    from tempground.groundtemp_lookup import groundtemp_lookup
except ImportError:
    # If you can't import them like this, comment these out and adapt as needed
    print("[WARNING] Could not import from your modules. Please adjust imports!")
    dhw_lookup = {}
    lighting_lookup = {}
    ventilation_lookup = {}
    zone_sizing_lookup = {}
    groundtemp_lookup = {}

# ---------------------------------------------------------------------------
# REPORTING FUNCTIONS - each function focuses on one dictionary structure
# ---------------------------------------------------------------------------

def report_dhw_lookup(writer):
    """
    Example for dhw_lookup, which might have the structure:
      dhw_lookup = {
        "pre_calibration": {
            "Residential_SingleFamily_Small": {
                "occupant_density_m2_per_person_range": (None, None),
                "liters_per_person_per_day_range": (45.0, 55.0),
                ...
            },
            "Office": { ... }
        },
        "post_calibration": { ... }
      }

    We'll write rows with columns:
      [lookup_name, stage, bldg_key, param_name, min_val, max_val]
    """
    lookup_name = "dhw_lookup"
    for stage, stage_dict in dhw_lookup.items():
        for bldg_key, param_dict in stage_dict.items():
            for param_name, val_range in param_dict.items():
                # Some param ranges may be (None, None), or a single number/schedule
                if isinstance(val_range, tuple) and len(val_range) == 2:
                    min_val, max_val = val_range
                    writer.writerow([lookup_name, stage, bldg_key, param_name, min_val, max_val])
                else:
                    # If it's not a tuple or is a single fixed value
                    writer.writerow([lookup_name, stage, bldg_key, param_name, val_range, val_range])


def report_lighting_lookup(writer):
    """
    Example for lighting_lookup, with structure:
      lighting_lookup = {
        "pre_calibration": {
          "residential": {
            "LIGHTS_WM2_range": (0.0, 0.0),
            "PARASITIC_WM2_range": (0.0, 0.0),
            ...
          },
          "Meeting Function": {...}
        },
        "post_calibration": {...}
      }
    """
    lookup_name = "lighting_lookup"
    for stage, stage_dict in lighting_lookup.items():
        for bldg_subkey, param_dict in stage_dict.items():
            for param_name, val_range in param_dict.items():
                if isinstance(val_range, tuple) and len(val_range) == 2:
                    writer.writerow([lookup_name, stage, bldg_subkey, param_name, val_range[0], val_range[1]])
                else:
                    writer.writerow([lookup_name, stage, bldg_subkey, param_name, val_range, val_range])


def report_ventilation_lookup(writer):
    """
    ventilation_lookup often has a deeper nested structure, e.g.:
      ventilation_lookup = {
        "pre_calibration": {
          "residential_infiltration_range": { "A_corner": (1.0,1.2), ... },
          "non_res_infiltration_range": {...},
          "year_factor_range": {...},
          "system_control_range_res": {
              "A": {"f_ctrl_range": (0.9,1.0)}, ...
          },
          ...
        },
        "post_calibration": {...}
      }
    We'll walk each sub-dict carefully.
    """
    lookup_name = "ventilation_lookup"
    for stage, stage_dict in ventilation_lookup.items():
        # top-level keys (e.g. "residential_infiltration_range", "year_factor_range", etc.)
        for cat_name, cat_data in stage_dict.items():
            # cat_data might be a dict of subkeys or a direct tuple
            if isinstance(cat_data, dict):
                for subkey, val in cat_data.items():
                    # if 'val' is another dict, e.g. "system_control_range_res": { "A": {"f_ctrl_range": (0.9,1.0)}}
                    if isinstance(val, dict):
                        for param_name, param_val in val.items():
                            if isinstance(param_val, tuple) and len(param_val) == 2:
                                writer.writerow([lookup_name, stage, f"{cat_name}:{subkey}",
                                                 param_name, param_val[0], param_val[1]])
                            else:
                                writer.writerow([lookup_name, stage, f"{cat_name}:{subkey}",
                                                 param_name, param_val, param_val])
                    else:
                        # If val is a tuple for infiltration range, etc.
                        if isinstance(val, tuple) and len(val) == 2:
                            writer.writerow([lookup_name, stage, cat_name, subkey, val[0], val[1]])
                        else:
                            writer.writerow([lookup_name, stage, cat_name, subkey, val, val])
            else:
                # cat_data is not a dict => might be a single tuple or number
                if isinstance(cat_data, tuple) and len(cat_data) == 2:
                    writer.writerow([lookup_name, stage, cat_name, "", cat_data[0], cat_data[1]])
                else:
                    writer.writerow([lookup_name, stage, cat_name, "", cat_data, cat_data])


def report_zone_sizing_lookup(writer):
    """
    zone_sizing_lookup = {
      "pre_calibration": {
        "residential": {
          "cooling_supply_air_temp_range": (13.5,14.5),
          "heating_supply_air_temp_range": (48.0,52.0),
          ...
        },
        "non_residential": {...}
      },
      "post_calibration": {...}
    }
    """
    lookup_name = "zone_sizing_lookup"
    for stage, stage_dict in zone_sizing_lookup.items():
        for bldg_func, param_dict in stage_dict.items():
            for param_name, val_range in param_dict.items():
                if isinstance(val_range, tuple) and len(val_range) == 2:
                    writer.writerow([lookup_name, stage, bldg_func, param_name,
                                     val_range[0], val_range[1]])
                else:
                    writer.writerow([lookup_name, stage, bldg_func, param_name,
                                     val_range, val_range])


def report_groundtemp_lookup(writer):
    """
    groundtemp_lookup = {
      "pre_calibration": {
        "January": (2.0,3.0),
        "February": (3.5,5.0),
        ...
      },
      "post_calibration": {...}
    }
    """
    lookup_name = "groundtemp_lookup"
    for stage, month_dict in groundtemp_lookup.items():
        for month_name, val_range in month_dict.items():
            # usually a (min,max) pair
            if isinstance(val_range, tuple) and len(val_range) == 2:
                writer.writerow([lookup_name, stage, month_name, "", val_range[0], val_range[1]])
            else:
                writer.writerow([lookup_name, stage, month_name, "", val_range, val_range])


# ---------------------------------------------------------------------------
# MAIN REPORT FUNCTION
# ---------------------------------------------------------------------------
def main():
    """
    Creates a CSV file `lookup_report.csv` that merges info from multiple lookups.
    Adjust or add more 'report_*' calls as needed.
    """
    with open("lookup_report.csv", "w", newline="") as f:
        writer = csv.writer(f)
        # Write header row
        writer.writerow(["LookupName", "CalibrationStage", "KeyOrCategory", "ParamName", "MinValue", "MaxValue"])

        # Call each specialized reporting function
        report_dhw_lookup(writer)
        report_lighting_lookup(writer)
        report_ventilation_lookup(writer)
        report_zone_sizing_lookup(writer)
        report_groundtemp_lookup(writer)
        # ... Add more if you have additional lookups

    print("[INFO] Finished creating 'lookup_report.csv' with min/max (or fixed) values from all lookups.")


if __name__ == "__main__":
    main()


AttributeError: 'float' object has no attribute 'items'

In [3]:
import os

def export_structure_and_content(base_directory, output_file, extensions=( '.py', '.csv')):   # extensions=('.py', '.csv')
    """
    Recursively walks through base_directory, finds all files with the given extensions,
    and writes directory structure, filenames, and contents to output_file.
    
    :param base_directory: The path of the directory to walk through.
    :param output_file: The path of the text file where results will be written.
    :param extensions: A tuple of file extensions to include.
    """
    with open(output_file, 'w', encoding='utf-8') as out:
        # Walk through the directory structure
        for root, dirs, files in os.walk(base_directory):
            # Write out the current folder name
            out.write(f"Folder: {root}\n")
            
            # Filter out files by the desired extensions
            for file_name in files:
                if file_name.lower().endswith(extensions):
                    file_path = os.path.join(root, file_name)
                    
                    # Write out the file name
                    out.write(f"\n  File: {file_name}\n")
                    
                    # Read and write the file contents
                    try:
                        with open(file_path, 'r', encoding='utf-8') as f:
                            content = f.read()
                    except UnicodeDecodeError:
                        # If there's an encoding issue, try ignoring errors or switch to a different encoding
                        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                            content = f.read()
                    
                    out.write("  --- File Contents Start ---\n")
                    out.write(content)
                    out.write("\n  --- File Contents End ---\n")
            
            out.write("\n" + "="*80 + "\n\n")


if __name__ == "__main__":
    # Example usage:
    # Change 'your_directory_path' to the path you want to scan
    # Change 'output.txt' to the desired output file name
    base_dir = r"D:\Documents\E_Plus_2030_py/ventilation"
    output_txt = "output.txt"
    
    export_structure_and_content(base_dir, output_txt)
    print(f"All .py and .csv files from '{base_dir}' have been exported to '{output_txt}'.")


All .py and .csv files from 'D:\Documents\E_Plus_2030_py/ventilation' have been exported to 'output.txt'.


In [2]:
import os

def export_structure_and_content(
    base_directory,
    output_file,
    extensions=('.py', '.csv'),
    excluded_dirs=None
):
    """
    Recursively walks through base_directory, finds all files with the given
    extensions, and writes directory structure, filenames, and contents
    to output_file, excluding any directories specified in excluded_dirs.
    
    :param base_directory: The path of the directory to walk through.
    :param output_file: The path of the text file where results will be written.
    :param extensions: A tuple of file extensions to include.
    :param excluded_dirs: A list of folder names to exclude from the walk.
    """
    if excluded_dirs is None:
        excluded_dirs = []  # Default to empty list if not provided

    with open(output_file, 'w', encoding='utf-8') as out:
        # Walk through the directory structure
        for root, dirs, files in os.walk(base_directory):
            # In-place removal of excluded directories
            dirs[:] = [d for d in dirs if d not in excluded_dirs]

            # Write out the current folder name
            out.write(f"Folder: {root}\n")
            
            # Filter out files by the desired extensions
            for file_name in files:
                if file_name.lower().endswith(extensions):
                    file_path = os.path.join(root, file_name)
                    
                    # Write out the file name
                    out.write(f"\n  File: {file_name}\n")
                    
                    # Read and write the file contents
                    try:
                        with open(file_path, 'r', encoding='utf-8') as f:
                            content = f.read()
                    except UnicodeDecodeError:
                        # If there's an encoding issue, try ignoring errors or switch to a different encoding
                        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                            content = f.read()
                    
                    out.write("  --- File Contents Start ---\n")
                    out.write(content)
                    out.write("\n  --- File Contents End ---\n")
            
            out.write("\n" + "="*80 + "\n\n")


if __name__ == "__main__":
    # Example usage:
    base_dir = r"D:\Documents\E_Plus_2030_py\cal"  # idf_objects\
    output_txt = "output.txt"
    # Add any directories you'd like to exclude here
    exclude_list = ["_pycache_", "aiaia", "calibration", "ZZZz", "shading", "output"]
    
    export_structure_and_content(
        base_dir,
        output_txt,
        extensions=('.py', '.csv', '.json'),
        excluded_dirs=exclude_list
    )
    print(f"All .py and .csv files from '{base_dir}' have been exported to '{output_txt}', excluding folders: {exclude_list}.")


All .py and .csv files from 'D:\Documents\E_Plus_2030_py\cal' have been exported to 'output.txt', excluding folders: ['_pycache_', 'aiaia', 'calibration', 'ZZZz', 'shading', 'output'].


i have the following code that i have, i first made it as piot project with test data. now i want to refine the lookup tables and  input dta. so together step by step we will woork on that. my projectis mainly based on archtetype of buildings. it can be different from object to object thatwe will work on it together.

i firasst provide how my input data will be, and te code i have. you first have ann understanding and a view of what we have and will go to next step later. 



these columns from my data can be :


for functions and types and age range it can be:
1.
building_function	residential_type	non_residential_type
residential	Corner House	null
non_residential	null	Meeting Function
residential	Apartment	null
residential	Terrace or Semi-detached House
non_residential	null	Healthcare Function
non_residential	null	Sport Function
non_residential	null	Cell Function
non_residential	null	Retail Function
residential	Detached House	null
non_residential	null	Industrial Function
residential	Two-and-a-half-story House
non_residential	null	Accommodation Function
non_residential	null	Office Function
non_residential	null	Education Function
non_residential	null	Other Use Function
2.
age_range
2015 and later
1992 - 2005
1945 - 1964
1975 - 1991
1965 - 1974
2006 - 2014
< 1945




ok, first lets fo gor geomz. as yyou can see, we have:
1. default values in geometry_lookup
2. an excel file and overriding that cantotally change this lookup table at beginnging after imports. 
3. a partially temporary overridings that can happen by user config


so, if py works as i want, then lets work on Lookup expansions. 

so,, how the look up table should be. 

also, how i need to provide my excel file. 

and also how the user configs can be 







Loads defaults from dictionaries,
Optionally overrides from an Excel file once at startup,
Optionally overrides on a per-building basis via user_config_XXX lists,
Then uses that final set of parameters to build an EnergyPlus model.

In [2]:
import os

def generate_tree_report(root_dir, indent=""):
    items = os.listdir(root_dir)
    # Sort to ensure consistent order (folders first, then files)
    items.sort(key=lambda x: (not os.path.isdir(os.path.join(root_dir, x)), x.lower()))

    for i, item in enumerate(items):
        path = os.path.join(root_dir, item)
        is_last_item = (i == len(items) - 1)

        # Choose prefix depending on whether this is the last item in the list
        tree_prefix = "└── " if is_last_item else "├── "
        # Print current item
        print(indent + tree_prefix + item)

        if os.path.isdir(path):
            # For child items, increase indentation
            new_indent = indent + ("    " if is_last_item else "│   ")
            generate_tree_report(path, new_indent)

if __name__ == "__main__":
    root_directory = r"D:\Documents\E_Plus_2030_py\aiaia"
    print(root_directory)
    generate_tree_report(root_directory)


D:\Documents\E_Plus_2030_py
├── .git
│   ├── hooks
│   │   ├── applypatch-msg.sample
│   │   ├── commit-msg.sample
│   │   ├── fsmonitor-watchman.sample
│   │   ├── post-update.sample
│   │   ├── pre-applypatch.sample
│   │   ├── pre-commit.sample
│   │   ├── pre-merge-commit.sample
│   │   ├── pre-push.sample
│   │   ├── pre-rebase.sample
│   │   ├── pre-receive.sample
│   │   ├── prepare-commit-msg.sample
│   │   ├── push-to-checkout.sample
│   │   ├── sendemail-validate.sample
│   │   └── update.sample
│   ├── info
│   │   └── exclude
│   ├── logs
│   │   ├── refs
│   │   │   ├── heads
│   │   │   │   └── main
│   │   │   └── remotes
│   │   │       └── origin
│   │   │           └── main
│   │   └── HEAD
│   ├── objects
│   │   ├── 01
│   │   │   └── 7a694b51a029189d21272cc10432ba641f2fad
│   │   ├── 02
│   │   │   └── 3f0df67ac93b04c6e91a5613757ba46545faec
│   │   ├── 03
│   │   │   ├── a9e515917f82b88a5d4dfdb7857740bccc2745
│   │   │   └── e59397c65382f5cef44eb637c61433bd9b7676
│

In [3]:
import os
import ast

def parse_python_file(file_path):
    """
    Parse a Python file using ast and return a dictionary of:
      - imports
      - from_imports
      - function names
      - class names
    """
    info = {
        "imports": [],
        "from_imports": [],
        "functions": [],
        "classes": []
    }
    
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            code = f.read()
        tree = ast.parse(code, file_path)
    except (SyntaxError, UnicodeDecodeError) as e:
        # In case there's a problematic file or encoding issue, skip gracefully
        info["error"] = str(e)
        return info

    # Walk the AST nodes to find imports, functions, and classes
    for node in ast.walk(tree):
        if isinstance(node, ast.Import):
            # e.g. import os, import sys
            for alias in node.names:
                info["imports"].append(alias.name)
        elif isinstance(node, ast.ImportFrom):
            # e.g. from sys import argv
            module_name = node.module
            for alias in node.names:
                info["from_imports"].append((module_name, alias.name))
        elif isinstance(node, ast.FunctionDef):
            info["functions"].append(node.name)
        elif isinstance(node, ast.ClassDef):
            info["classes"].append(node.name)

    return info


def generate_tree_and_analysis(root_dir, indent=""):
    """
    Recursively walk through the folder structure starting at root_dir.
    Print a tree structure, and for each .py file, parse and report its contents.
    """
    items = sorted(os.listdir(root_dir), key=lambda x: (not os.path.isdir(os.path.join(root_dir, x)), x.lower()))
    for i, item in enumerate(items):
        path = os.path.join(root_dir, item)
        is_last_item = (i == len(items) - 1)

        # Tree icon prefix
        tree_prefix = "└── " if is_last_item else "├── "
        print(indent + tree_prefix + item)

        if os.path.isdir(path):
            # Recursively explore subfolders
            new_indent = indent + ("    " if is_last_item else "│   ")
            generate_tree_and_analysis(path, new_indent)
        else:
            # If it's a Python file, parse and show metadata
            if item.endswith(".py"):
                parse_info = parse_python_file(path)
                show_file_analysis(parse_info, indent + ("    " if is_last_item else "│   "))


def show_file_analysis(parse_info, indent):
    """
    Print the parse results (imports, from_imports, functions, classes)
    with indentation for a tree-like display.
    """
    # If there was an error parsing, report it
    if "error" in parse_info:
        print(f"{indent}└── [Error parsing file: {parse_info['error']}]")
        return

    # Show imports
    if parse_info["imports"]:
        print(f"{indent}└── Imports:")
        for idx, imp in enumerate(parse_info["imports"]):
            prefix = "    " if idx == len(parse_info["imports"]) - 1 else "│   "
            print(f"{indent}    {prefix}{imp}")

    # Show from-imports
    if parse_info["from_imports"]:
        print(f"{indent}└── From Imports:")
        for idx, (mod, name) in enumerate(parse_info["from_imports"]):
            prefix = "    " if idx == len(parse_info["from_imports"]) - 1 else "│   "
            print(f"{indent}    {prefix}from {mod} import {name}")

    # Show functions
    if parse_info["functions"]:
        print(f"{indent}└── Functions:")
        for idx, func in enumerate(parse_info["functions"]):
            prefix = "    " if idx == len(parse_info["functions"]) - 1 else "│   "
            print(f"{indent}    {prefix}{func}")

    # Show classes
    if parse_info["classes"]:
        print(f"{indent}└── Classes:")
        for idx, cls in enumerate(parse_info["classes"]):
            prefix = "    " if idx == len(parse_info["classes"]) - 1 else "│   "
            print(f"{indent}    {prefix}{cls}")


if __name__ == "__main__":
    root_directory = r"D:\Documents\E_Plus_2030_py"
    print(f"Mapping folder structure and Python file contents for:\n{root_directory}\n")
    generate_tree_and_analysis(root_directory)


Mapping folder structure and Python file contents for:
D:\Documents\E_Plus_2030_py

├── .git
│   ├── hooks
│   │   ├── applypatch-msg.sample
│   │   ├── commit-msg.sample
│   │   ├── fsmonitor-watchman.sample
│   │   ├── post-update.sample
│   │   ├── pre-applypatch.sample
│   │   ├── pre-commit.sample
│   │   ├── pre-merge-commit.sample
│   │   ├── pre-push.sample
│   │   ├── pre-rebase.sample
│   │   ├── pre-receive.sample
│   │   ├── prepare-commit-msg.sample
│   │   ├── push-to-checkout.sample
│   │   ├── sendemail-validate.sample
│   │   └── update.sample
│   ├── info
│   │   └── exclude
│   ├── logs
│   │   ├── refs
│   │   │   ├── heads
│   │   │   │   └── main
│   │   │   └── remotes
│   │   │       └── origin
│   │   │           └── main
│   │   └── HEAD
│   ├── objects
│   │   ├── 01
│   │   │   └── 7a694b51a029189d21272cc10432ba641f2fad
│   │   ├── 02
│   │   │   └── 3f0df67ac93b04c6e91a5613757ba46545faec
│   │   ├── 03
│   │   │   ├── a9e515917f82b88a5d4dfdb7857740bccc2745


In [9]:
import os
import ast

def parse_python_file(file_path):
    """
    Parse a Python file using ast and return a dictionary of:
      - imports
      - from_imports
      - function names
      - class names
      - csv_inputs  (detected .csv files read in)
      - csv_outputs (detected .csv files written to)
      - excel_inputs  (detected .xls/.xlsx files read in)
      - excel_outputs (detected .xls/.xlsx files written to)
    """
    info = {
        "imports": [],
        "from_imports": [],
        "functions": [],
        "classes": [],
        "csv_inputs": [],
        "csv_outputs": [],
        "excel_inputs": [],
        "excel_outputs": []
    }

    try:
        with open(file_path, "r", encoding="utf-8") as f:
            code = f.read()
        tree = ast.parse(code, file_path)
    except (SyntaxError, UnicodeDecodeError) as e:
        # In case there's a problematic file or encoding issue, skip gracefully
        info["error"] = f"Parsing error: {e}"
        return info

    # 1. Collect imports, from-imports, function names, and class names
    for node in ast.walk(tree):
        # Imports
        if isinstance(node, ast.Import):
            for alias in node.names:
                info["imports"].append(alias.name)
        elif isinstance(node, ast.ImportFrom):
            module_name = node.module
            for alias in node.names:
                info["from_imports"].append((module_name, alias.name))
        elif isinstance(node, ast.FunctionDef):
            info["functions"].append(node.name)
        elif isinstance(node, ast.ClassDef):
            info["classes"].append(node.name)

    # 2. Attempt to detect CSV/Excel reading & writing
    #    We’ll look for function calls to open/read_csv/read_excel/to_csv/to_excel
    #    and capture string arguments that look like *.csv, *.xls, *.xlsx
    for node in ast.walk(tree):
        if isinstance(node, ast.Call):
            # The function being called could be open(), or an attribute (e.g., pd.read_csv)
            func = node.func

            # Check if it's "open(...)"
            if isinstance(func, ast.Name) and func.id == "open":
                # e.g., open("mydata.csv", "r")
                filename = _get_first_str_arg(node)
                if filename:
                    if filename.lower().endswith(".csv"):
                        # Check if 'w' or 'a' in the mode to guess it's output
                        mode = _get_mode_arg(node)
                        if mode and any(m in mode for m in ("w", "a", "+")):
                            info["csv_outputs"].append(filename)
                        else:
                            info["csv_inputs"].append(filename)

            # Check if it's an attribute call, e.g. pd.read_csv or df.to_csv
            elif isinstance(func, ast.Attribute):
                attr_name = func.attr.lower()  # e.g. "read_csv", "to_csv", "read_excel", etc.

                # read_csv / read_excel
                if attr_name in ("read_csv", "read_excel"):
                    filename = _get_first_str_arg(node)
                    if filename:
                        if filename.lower().endswith(".csv"):
                            info["csv_inputs"].append(filename)
                        elif filename.lower().endswith(".xls") or filename.lower().endswith(".xlsx"):
                            info["excel_inputs"].append(filename)

                # to_csv / to_excel
                elif attr_name in ("to_csv", "to_excel"):
                    # Usually the filename is the first argument
                    filename = _get_first_str_arg(node)
                    if filename:
                        if filename.lower().endswith(".csv"):
                            info["csv_outputs"].append(filename)
                        elif filename.lower().endswith(".xls") or filename.lower().endswith(".xlsx"):
                            info["excel_outputs"].append(filename)

    return info


def _get_first_str_arg(call_node):
    """
    Helper function to return the first argument of a function call if it’s a string literal.
    E.g. open("data.csv") => "data.csv"
    """
    if call_node.args:
        first_arg = call_node.args[0]
        if isinstance(first_arg, ast.Constant) and isinstance(first_arg.value, str):
            return first_arg.value
        elif isinstance(first_arg, ast.Str):  # For older Python versions (<3.8)
            return first_arg.s
    return None


def _get_mode_arg(call_node):
    """
    If the function call has a second positional argument that might be the mode (e.g., 'r', 'w'),
    return that. This is specific to open(...) usage.
    """
    if len(call_node.args) > 1:
        second_arg = call_node.args[1]
        if isinstance(second_arg, ast.Constant) and isinstance(second_arg.value, str):
            return second_arg.value
        elif isinstance(second_arg, ast.Str):  # Python <3.8
            return second_arg.s
    return None


def generate_tree_and_analysis(root_dir, indent=""):
    """
    Recursively walk through the folder structure starting at root_dir.
    Print a tree structure, and for each .py file, parse and show metadata
    about imports, functions, classes, and CSV/Excel usage.
    """
    items = sorted(
        os.listdir(root_dir),
        key=lambda x: (not os.path.isdir(os.path.join(root_dir, x)), x.lower())
    )

    for i, item in enumerate(items):
        path = os.path.join(root_dir, item)
        is_last_item = (i == len(items) - 1)

        tree_prefix = "└── " if is_last_item else "├── "
        print(indent + tree_prefix + item)

        if os.path.isdir(path):
            new_indent = indent + ("    " if is_last_item else "│   ")
            generate_tree_and_analysis(path, new_indent)
        else:
            if item.endswith(".py"):
                parse_info = parse_python_file(path)
                show_file_analysis(parse_info, indent + ("    " if is_last_item else "│   "))


def show_file_analysis(parse_info, indent):
    """
    Print the parse results in a tree-like display.
    """

    if "error" in parse_info:
        print(f"{indent}└── [Error parsing file: {parse_info['error']}]")
        return

    # Show standard info
    _print_list(parse_info["imports"], "Imports", indent)
    _print_list(
        [f"from {mod} import {name}" for (mod, name) in parse_info["from_imports"]],
        "From Imports",
        indent
    )
    _print_list(parse_info["functions"], "Functions", indent)
    _print_list(parse_info["classes"], "Classes", indent)

    # Show CSV/Excel usage
    _print_list(parse_info["csv_inputs"], "CSV Inputs", indent)
    _print_list(parse_info["csv_outputs"], "CSV Outputs", indent)
    _print_list(parse_info["excel_inputs"], "Excel Inputs", indent)
    _print_list(parse_info["excel_outputs"], "Excel Outputs", indent)


def _print_list(items, label, indent):
    """Helper to print a label and list of items in a tree-like manner."""
    if not items:
        return
    print(f"{indent}└── {label}:")
    for i, val in enumerate(items):
        is_last = (i == len(items) - 1)
        prefix = "    " if is_last else "│   "
        print(f"{indent}    {prefix}{val}")


if __name__ == "__main__":
    root_directory = r"D:\Documents\E_Plus_2030_py"
    print(f"Mapping folder structure and Python file contents for:\n{root_directory}\n")
    generate_tree_and_analysis(root_directory)


Mapping folder structure and Python file contents for:
D:\Documents\E_Plus_2030_py

├── .git
│   ├── hooks
│   │   ├── applypatch-msg.sample
│   │   ├── commit-msg.sample
│   │   ├── fsmonitor-watchman.sample
│   │   ├── post-update.sample
│   │   ├── pre-applypatch.sample
│   │   ├── pre-commit.sample
│   │   ├── pre-merge-commit.sample
│   │   ├── pre-push.sample
│   │   ├── pre-rebase.sample
│   │   ├── pre-receive.sample
│   │   ├── prepare-commit-msg.sample
│   │   ├── push-to-checkout.sample
│   │   ├── sendemail-validate.sample
│   │   └── update.sample
│   ├── info
│   │   └── exclude
│   ├── logs
│   │   ├── refs
│   │   │   ├── heads
│   │   │   │   └── main
│   │   │   └── remotes
│   │   │       └── origin
│   │   │           └── main
│   │   └── HEAD
│   ├── objects
│   │   ├── 01
│   │   │   └── 7a694b51a029189d21272cc10432ba641f2fad
│   │   ├── 02
│   │   │   └── 3f0df67ac93b04c6e91a5613757ba46545faec
│   │   ├── 03
│   │   │   ├── a9e515917f82b88a5d4dfdb7857740bccc2745


In [3]:
import os
import ast
import csv

def parse_python_file(file_path, root_dir):
    """
    Parse a Python file using ast and return a dictionary of:
      - imports
      - from_imports
      - functions
      - classes
      - csv_inputs  (detected .csv files read in)
      - csv_outputs (detected .csv files written to)
      - excel_inputs  (detected .xls/.xlsx files read in)
      - excel_outputs (detected .xls/.xlsx files written to)
      - csv_columns  (columns for each input CSV we can actually open)
    """
    info = {
        "imports": [],
        "from_imports": [],
        "functions": [],
        "classes": [],
        "csv_inputs": [],
        "csv_outputs": [],
        "excel_inputs": [],
        "excel_outputs": [],
        "csv_columns": {}  # dict: filename -> [list_of_columns]
    }

    try:
        with open(file_path, "r", encoding="utf-8") as f:
            code = f.read()
        tree = ast.parse(code, file_path)
    except (SyntaxError, UnicodeDecodeError) as e:
        info["error"] = f"Parsing error: {e}"
        return info

    # 1. Collect imports, from-imports, function names, and class names
    for node in ast.walk(tree):
        if isinstance(node, ast.Import):
            for alias in node.names:
                info["imports"].append(alias.name)
        elif isinstance(node, ast.ImportFrom):
            module_name = node.module
            for alias in node.names:
                info["from_imports"].append((module_name, alias.name))
        elif isinstance(node, ast.FunctionDef):
            info["functions"].append(node.name)
        elif isinstance(node, ast.ClassDef):
            info["classes"].append(node.name)

    # 2. Attempt to detect CSV/Excel reading & writing
    for node in ast.walk(tree):
        if isinstance(node, ast.Call):
            func = node.func

            # open(...)
            if isinstance(func, ast.Name) and func.id == "open":
                filename = _get_first_str_arg(node)
                if filename and filename.lower().endswith(".csv"):
                    mode = _get_mode_arg(node)
                    if mode and any(m in mode for m in ("w", "a", "+")):
                        info["csv_outputs"].append(filename)
                    else:
                        info["csv_inputs"].append(filename)

            # e.g., pd.read_csv, pd.read_excel, df.to_csv, df.to_excel
            elif isinstance(func, ast.Attribute):
                attr_name = func.attr.lower()
                # read_csv / read_excel
                if attr_name in ("read_csv", "read_excel"):
                    filename = _get_first_str_arg(node)
                    if filename:
                        if filename.lower().endswith(".csv"):
                            info["csv_inputs"].append(filename)
                        elif filename.lower().endswith(".xls") or filename.lower().endswith(".xlsx"):
                            info["excel_inputs"].append(filename)
                # to_csv / to_excel
                elif attr_name in ("to_csv", "to_excel"):
                    filename = _get_first_str_arg(node)
                    if filename:
                        if filename.lower().endswith(".csv"):
                            info["csv_outputs"].append(filename)
                        elif filename.lower().endswith(".xls") or filename.lower().endswith(".xlsx"):
                            info["excel_outputs"].append(filename)

    # 3. Try to read the columns from each CSV input
    for csv_file in info["csv_inputs"]:
        # Attempt to locate the CSV file on disk
        #  - If csv_file is absolute, we'll use that path.
        #  - If csv_file is relative, we'll join it with root_dir or with the folder of the .py file.
        possible_path = _resolve_path(csv_file, file_path, root_dir)
        try:
            if os.path.exists(possible_path):
                columns = _get_csv_columns(possible_path)
                if columns:
                    info["csv_columns"][csv_file] = columns
                else:
                    info["csv_columns"][csv_file] = ["[No columns found or file empty]"]
            else:
                info["csv_columns"][csv_file] = [f"[File not found: {possible_path}]"]
        except Exception as e:
            info["csv_columns"][csv_file] = [f"[Error reading file: {e}]"]

    return info

def _get_first_str_arg(call_node):
    """Return the first argument as a string if it’s a literal."""
    if call_node.args:
        first_arg = call_node.args[0]
        # For Python 3.8+:
        if isinstance(first_arg, ast.Constant) and isinstance(first_arg.value, str):
            return first_arg.value
        # For older Python versions (< 3.8):
        elif isinstance(first_arg, ast.Str):
            return first_arg.s
    return None

def _get_mode_arg(call_node):
    """Return the second argument (mode) if it’s a string literal (for open(...))."""
    if len(call_node.args) > 1:
        second_arg = call_node.args[1]
        if isinstance(second_arg, ast.Constant) and isinstance(second_arg.value, str):
            return second_arg.value
        elif isinstance(second_arg, ast.Str):
            return second_arg.s
    return None

def _resolve_path(csv_file, py_file_path, root_dir):
    """
    Try to figure out a path to the CSV file.
    1) If csv_file is absolute, return it directly.
    2) Otherwise, try relative to the folder containing the Python script.
    3) If that doesn't exist, try relative to the overall root_dir.
    """
    if os.path.isabs(csv_file):
        return csv_file

    # Path of the folder containing the .py file
    py_folder = os.path.dirname(py_file_path)

    # Try relative to the .py file's folder
    candidate1 = os.path.join(py_folder, csv_file)
    if os.path.exists(candidate1):
        return candidate1

    # If that doesn't exist, try relative to the overall root_dir
    candidate2 = os.path.join(root_dir, csv_file)
    return candidate2

def _get_csv_columns(path_to_csv):
    """
    Read the header row of a CSV and return a list of column names.
    We only read the first line or so, to avoid big memory usage.
    """
    with open(path_to_csv, "r", newline="", encoding="utf-8") as f:
        # We'll try using csv.DictReader to interpret headers:
        reader = csv.DictReader(f)
        return reader.fieldnames  # returns a list of column names

def generate_tree_and_analysis(root_dir, indent=""):
    """
    Recursively walk through the folder structure starting at root_dir.
    Print a tree structure, and for each .py file, parse and show metadata
    about imports, functions, classes, CSV/Excel usage, and CSV columns.
    """
    items = sorted(
        os.listdir(root_dir),
        key=lambda x: (not os.path.isdir(os.path.join(root_dir, x)), x.lower())
    )

    for i, item in enumerate(items):
        path = os.path.join(root_dir, item)
        is_last_item = (i == len(items) - 1)
        tree_prefix = "└── " if is_last_item else "├── "
        print(indent + tree_prefix + item)

        if os.path.isdir(path):
            new_indent = indent + ("    " if is_last_item else "│   ")
            generate_tree_and_analysis(path, new_indent)
        else:
            if item.endswith(".py"):
                parse_info = parse_python_file(path, root_dir)
                show_file_analysis(parse_info, indent + ("    " if is_last_item else "│   "))

def show_file_analysis(parse_info, indent):
    """Print the parse results in a tree-like format."""
    if "error" in parse_info:
        print(f"{indent}└── [Error parsing file: {parse_info['error']}]")
        return

    _print_list(parse_info["imports"], "Imports", indent)
    _print_list(
        [f"from {mod} import {name}" for (mod, name) in parse_info["from_imports"]],
        "From Imports",
        indent
    )
    _print_list(parse_info["functions"], "Functions", indent)
    _print_list(parse_info["classes"], "Classes", indent)
    _print_list(parse_info["csv_inputs"], "CSV Inputs", indent)
    _print_list(parse_info["csv_outputs"], "CSV Outputs", indent)
    _print_list(parse_info["excel_inputs"], "Excel Inputs", indent)
    _print_list(parse_info["excel_outputs"], "Excel Outputs", indent)

    # Now, print out any discovered CSV columns
    if parse_info["csv_columns"]:
        print(f"{indent}└── CSV Columns:")
        for i, (csv_file, columns) in enumerate(parse_info["csv_columns"].items()):
            is_last = (i == len(parse_info["csv_columns"]) - 1)
            prefix = "    " if is_last else "│   "
            print(f"{indent}    {prefix}{csv_file}:")
            if not columns:
                print(f"{indent}    {prefix}    [No columns detected]")
            else:
                for j, col in enumerate(columns):
                    col_prefix = "       " if is_last and j == len(columns) - 1 else "│      "
                    # If there's only one item left, break the lines properly
                    # But let's keep it simple:
                    print(f"{indent}    {prefix}    - {col}")

def _print_list(items, label, indent):
    """Helper to print a label and list of items in a tree-like manner."""
    if not items:
        return
    print(f"{indent}└── {label}:")
    for i, val in enumerate(items):
        is_last = (i == len(items) - 1)
        prefix = "    " if is_last else "│   "
        print(f"{indent}    {prefix}{val}")


if __name__ == "__main__":
    # Adjust this path to your folder
    root_directory = r"D:\Documents\E_Plus_2030_py"
    print(f"Mapping folder structure and Python file contents for:\n{root_directory}\n")
    generate_tree_and_analysis(root_directory)


Mapping folder structure and Python file contents for:
D:\Documents\E_Plus_2030_py

├── .git
│   ├── hooks
│   │   ├── applypatch-msg.sample
│   │   ├── commit-msg.sample
│   │   ├── fsmonitor-watchman.sample
│   │   ├── post-update.sample
│   │   ├── pre-applypatch.sample
│   │   ├── pre-commit.sample
│   │   ├── pre-merge-commit.sample
│   │   ├── pre-push.sample
│   │   ├── pre-rebase.sample
│   │   ├── pre-receive.sample
│   │   ├── prepare-commit-msg.sample
│   │   ├── push-to-checkout.sample
│   │   ├── sendemail-validate.sample
│   │   └── update.sample
│   ├── info
│   │   └── exclude
│   ├── logs
│   │   ├── refs
│   │   │   ├── heads
│   │   │   │   └── main
│   │   │   └── remotes
│   │   │       └── origin
│   │   │           └── main
│   │   └── HEAD
│   ├── objects
│   │   ├── 00
│   │   │   ├── 01ee0f6b96a9f4545f61f74f2d4eff091ba50b
│   │   │   └── 23487c20a3f512631072d1a67402d454f6b774
│   │   ├── 01
│   │   │   ├── 57c36ba5e9b95dc93ace829037970455452f82
│   │   │   └──

In [2]:
import os
import glob
import pandas as pd

def read_files_in_folder(folder_path,
                         row_range=None,
                         col_range=None,
                         col_names=None):
    """
    Reads all .xlsx and .csv files in the given folder and
    prints out data slices based on row_range, col_range, and/or col_names.

    Parameters:
    -----------
    folder_path : str
        The path to the folder containing .xlsx or .csv files.

    row_range : tuple or None, default=None
        A tuple (start_row, end_row) for slicing rows by index.
        e.g. (0, 5) -> the first 5 rows.

    col_range : tuple or None, default=None
        A tuple (start_col, end_col) for slicing columns by index.
        e.g. (0, 3) -> columns at index 0, 1, 2.

    col_names : list or None, default=None
        A list of column names to select, e.g. ["Name", "Age"].

    Usage Examples:
    --------------
    read_files_in_folder(r"C:\MyData",
                         row_range=(0, 5),
                         col_range=(1, 4),
                         col_names=["ColumnA", "ColumnB"])
    """

    # 1. Grab all Excel files ending with ".xlsx"
    excel_files = glob.glob(os.path.join(folder_path, "*.xlsx"))
    # 2. Grab all CSV files ending with ".csv"
    csv_files = glob.glob(os.path.join(folder_path, "*.csv"))

    # Combine the two lists
    all_files = excel_files + csv_files

    # 3. Loop through each file found
    for file_path in all_files:
        # Determine if this is an Excel file or CSV file
        if file_path.lower().endswith(".xlsx"):
            df = pd.read_excel(file_path)
        elif file_path.lower().endswith(".csv"):
            df = pd.read_csv(file_path)
        else:
            # Skip any file that doesn't match .xlsx or .csv
            continue

        # Print file name for clarity
        print(f"\n=== Reading file: {os.path.basename(file_path)} ===")
        
        # ------------------------------------
        # A) Apply row slicing by index if row_range is given
        if row_range is not None:
            start_row, end_row = row_range
            df = df.iloc[start_row:end_row, :]  # Slicing the rows
        
        # B) Apply column slicing by index if col_range is given
        if col_range is not None:
            start_col, end_col = col_range
            df = df.iloc[:, start_col:end_col]  # Slicing the columns

        # C) If col_names is provided, select only those columns by name
        #    (making sure they exist in the DataFrame)
        if col_names is not None:
            # Filter out any col_names that might not be in df.columns
            existing_cols = [c for c in col_names if c in df.columns]
            if existing_cols:
                df = df[existing_cols]
            else:
                print("Warning: None of the requested columns found in this file.")

        # Finally, print the resulting slice
        print(df)

# --------------------------------------------------------------------------
# USAGE EXAMPLE
if __name__ == "__main__":
    folder_path = r"D:\Documents\E_Plus_2030_py\lookup_xlx"
    
    # Example 1: View first 5 rows, columns 0 to 3
    # read_files_in_folder(folder_path, row_range=(0, 5), col_range=(0, 3))

    # Example 2: View first 5 rows, but only columns named "ColumnA" and "ColumnB"
    # read_files_in_folder(folder_path, row_range=(0, 5), col_names=["ColumnA", "ColumnB"])

    # Example 3: Combine row index range and column index range and column names
    # *Typically you'd choose either col_range or col_names, but here's how you'd do both:
    # read_files_in_folder(folder_path,
    #                      row_range=(0, 10),
    #                      col_range=(2, 5),
    #                      col_names=["SomeColumn", "AnotherColumn"])

    # For a real run, un-comment one of the lines above or pass your own parameters:
    read_files_in_folder(folder_path, row_range=(0, 5), col_range=(0, 3))



=== Reading file: dhw_lookup.xlsx ===
            section_type             key_name subkey_name
0  TABLE_13_1_KWH_PER_M2     Meeting Function         NaN
1  TABLE_13_1_KWH_PER_M2      Office Function         NaN
2  TABLE_13_1_KWH_PER_M2      Retail Function         NaN
3  TABLE_13_1_KWH_PER_M2  Healthcare Function         NaN
4  TABLE_13_1_KWH_PER_M2   Education Function         NaN

=== Reading file: elec_schedules.xlsx ===
  building_category building_subtype day_type
0       Residential     Corner House  weekday
1       Residential     Corner House  weekday
2       Residential     Corner House  weekday
3       Residential     Corner House  weekday
4       Residential     Corner House  weekday

=== Reading file: epw_lookup.xlsx ===
                                           file_path  year    lat
0  C:/Users/aminj/OneDrive/Desktop/EnergyPlus/Wea...  2018  52.12
1  C:/Users/aminj/OneDrive/Desktop/EnergyPlus/Wea...  2020  52.15
2  C:/Users/aminj/OneDrive/Desktop/EnergyPlus/Wea...  205