In [1]:
# Definitions
# This script is used to generate a file tree from terraform modules in yaml format.
# 
# Dependencies:
# - PyYAML
# - python-hcl2
#
# Files located in the gen3-terraform/tf_files directory are used to generate improperly formatted yaml files in the same location.
# In terraform, files are grouped into different directories based on their purpose.
# This allows for better organization and easier navigation of the codebase.
# 
# In future releases, intermediary files will be deleted upon successful generation of the yaml tree.  
# 
# 
# Terraform complete folder
terraform_folder="gen3-terraform/tf_files"
# Intermediary folder for equivalent yaml files in the same location as terraform files
yaml_folder="yaml_files"
# Equivalent Terraform initialization folder
yaml_exec_source="yaml_files/aws/commons"
yaml_exec_tree="yaml_tree"

### Copy all **non-tf** files in directory to the yaml file directory

In [2]:
#!/usr/bin/env python3
"""
copy_non_tf.py

Copies all files under SOURCE_DIR that do NOT end with .tf
to DEST_DIR, preserving the directory structure.
"""

import os
import shutil
import sys

def copy_non_tf(source_dir, dest_dir):
    source_dir = os.path.abspath(source_dir)
    for root, _, files in os.walk(source_dir):
        # Compute relative path from the source root
        rel_dir = os.path.relpath(root, source_dir)
        for fname in files:
            if not fname.endswith('.tf'):
                src_path = os.path.join(root, fname)
                # Build the corresponding destination path
                target_dir = os.path.join(dest_dir, rel_dir)
                os.makedirs(target_dir, exist_ok=True)
                dst_path = os.path.join(target_dir, fname)
                shutil.copy2(src_path, dst_path)
                print(f"Copied: {src_path} → {dst_path}")

copy_non_tf(terraform_folder, yaml_folder)

Copied: c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\gen3-terraform\tf_files\README.md → yaml_files\.\README.md
Copied: c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\gen3-terraform\tf_files\aws\access\manifest.json → yaml_files\aws\access\manifest.json
Copied: c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\gen3-terraform\tf_files\aws\access\sample.tfvars → yaml_files\aws\access\sample.tfvars
Copied: c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\gen3-terraform\tf_files\aws\account-policies\manifest.json → yaml_files\aws\account-policies\manifest.json
Copied: c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\gen3-terraform\tf_files\aws\account-policies\README.md → yaml_files\aws\account-polic

### Convert and copy all .tf files and copy them to the yaml file directory

In [3]:
#!/usr/bin/env python3
"""
Recursively parse all .tf files in a directory and emit corresponding .tf.yaml files,
with debug logging at every step.
"""
import os
import argparse
import hcl2
import yaml

def parse_directory(src_dir: str, out_dir: str):
    # Normalize input paths
    src_dir = os.path.abspath(src_dir)
    out_dir = os.path.abspath(out_dir)
    print(f"DEBUG: Starting parse_directory")
    print(f"DEBUG:  src_dir = {src_dir}")
    print(f"DEBUG:  out_dir = {out_dir}\n")

    for root, _, files in os.walk(src_dir):
        rel = os.path.relpath(root, src_dir)
        print(f"DEBUG: Walking into directory: {root}")
        print(f"DEBUG:  Relative path = {rel}")
        print(f"DEBUG:  Files in this dir = {files}")

        for fname in files:
            print(f"\nDEBUG: Inspecting file: {fname}")
            if not fname.endswith(".tf"):
                print(f"DEBUG:  Skipping (not a .tf): {fname}")
                continue

            tf_path = os.path.join(root, fname)
            print(f"DEBUG:  Found Terraform file: {tf_path}")
            try:
                with open(tf_path, "r") as f:
                    print(f"DEBUG:    Loading HCL from {tf_path}")
                    parsed = hcl2.load(f)
                print(f"DEBUG:    HCL parsed, type(parsed)={type(parsed).__name__}")
            except Exception as e:
                print(f"ERROR:   Failed to parse {tf_path}: {e}")
                continue

            # compute output path
            out_subdir = out_dir if rel == "." else os.path.join(out_dir, rel)
            print(f"DEBUG:    out_subdir = {out_subdir}")
            os.makedirs(out_subdir, exist_ok=True)

            yaml_path = os.path.join(out_subdir, fname + ".yaml")
            print(f"DEBUG:    yaml_path = {yaml_path}")
            try:
                with open(yaml_path, "w") as yf:
                    yaml.safe_dump(parsed, yf, sort_keys=False)
                print(f"✓ Written: {tf_path} → {yaml_path}")
            except Exception as e:
                print(f"ERROR:   Failed to write YAML to {yaml_path}: {e}")

if __name__ == "__main__":
    # you can parameterize these via argparse if you like
    parse_directory(terraform_folder, yaml_folder)

DEBUG: Starting parse_directory
DEBUG:  src_dir = c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\gen3-terraform\tf_files
DEBUG:  out_dir = c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\yaml_files

DEBUG: Walking into directory: c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\gen3-terraform\tf_files
DEBUG:  Relative path = .
DEBUG:  Files in this dir = ['README.md']

DEBUG: Inspecting file: README.md
DEBUG:  Skipping (not a .tf): README.md
DEBUG: Walking into directory: c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\gen3-terraform\tf_files\aws
DEBUG:  Relative path = aws
DEBUG:  Files in this dir = ['root.tf', 'variables.tf']

DEBUG: Inspecting file: root.tf
DEBUG:  Found Terraform file: c:\Users\boadeyem\OneDrive - Indiana University\Documents\Master

### Get the Yaml terraform source and **rebuild the tree in a new location**

In [4]:
import os
import shutil
import yaml
from typing import Any, List, Tuple

def extract_sources(data: Any, ctx: str = "") -> List[Tuple[str, str]]:
    """
    Recursively walk a loaded YAML data structure, returning a list of
    (source_value, context_path) for every 'source' key found.
    """
    found = []
    if isinstance(data, dict):
        for k, v in data.items():
            path = f"{ctx}.{k}" if ctx else k
            if k == "source" and isinstance(v, str):
                found.append((v, path))
            else:
                found += extract_sources(v, path)
    elif isinstance(data, list):
        for idx, item in enumerate(data):
            path = f"{ctx}[{idx}]"
            found += extract_sources(item, path)
    return found

def group_yaml_by_source(input_dir: str, output_dir: str):
    input_dir = os.path.abspath(input_dir)
    output_dir = os.path.abspath(output_dir)
    print(f"DEBUG: Normalized input_dir = {input_dir}")
    print(f"DEBUG: Normalized output_dir = {output_dir}")
    os.makedirs(output_dir, exist_ok=True)

    print(f"\nDEBUG: Scanning directory: {input_dir}")
    for entry in os.listdir(input_dir):
        full_entry = os.path.join(input_dir, entry)
        rel_entry = os.path.relpath(full_entry, input_dir)

        if not entry.lower().endswith(('.yml', '.yaml')):
            print(f"DEBUG: └─ Skipping non-YAML ({rel_entry})")
            continue

        print(f"DEBUG: └─ Found YAML file: {rel_entry}")
        try:
            with open(full_entry, 'r') as f:
                data = yaml.safe_load(f) or {}
        except yaml.YAMLError as e:
            print(f"DEBUG:     YAML parse error in {rel_entry}, skipping: {e}")
            continue

        sources = extract_sources(data)
        if not sources:
            print(f"DEBUG:     No 'source:' keys found in {rel_entry}")
            continue

        print(f"DEBUG:     sources found in {rel_entry}:")
        for src_val, ctx_path in sources:
            print(f"DEBUG:       • {ctx_path} → {src_val!r}")

        for src_val, ctx_path in sources:
            print(f"\nDEBUG:     Processing source: {src_val!r} (at {ctx_path})")

            # compute absolute path to the child folder
            src_folder = os.path.normpath(os.path.join(input_dir, src_val))
            rel_src = os.path.relpath(src_folder, input_dir)
            print(f"DEBUG:       Absolute src_folder = {src_folder}")
            print(f"DEBUG:       Relative src_folder = {rel_src}")

            # strip any leading '..' for output path
            parts = [p for p in src_val.replace('\\','/').split('/') if p != '..']
            if not parts:
                # base case: the source was just ../.. etc.
                # so we copy straight into the root of output_dir
                dest_folder = output_dir
                print("DEBUG:       Base-case source (only '..'); using output_dir as dest_folder")
            else:
                # normal case: rebuild the subpath under output_dir
                dest_folder = os.path.join(output_dir, *parts)

            print(f"DEBUG:       Computed dest_folder = {dest_folder}")
            os.makedirs(dest_folder, exist_ok=True)

            if not os.path.isdir(src_folder):
                print(f"DEBUG:       src_folder {rel_src} is not a directory; skipping copy.")
                continue

            # copy entire contents
            for root, dirs, files in os.walk(src_folder):
                rel_sub = os.path.relpath(root, src_folder)
                target_root = (
                    dest_folder
                    if rel_sub == "."
                    else os.path.join(dest_folder, rel_sub)
                )
                print(f"DEBUG:       Traversing {rel_sub or '/'} → will copy into {os.path.relpath(target_root, output_dir)}")
                os.makedirs(target_root, exist_ok=True)

                for fname in files:
                    src_file = os.path.join(root, fname)
                    dest_file = os.path.join(target_root, fname)
                    print(f"DEBUG:         Copying {os.path.relpath(src_file, input_dir)} → {os.path.relpath(dest_file, output_dir)} ", end="")
                    try:
                        shutil.copy2(src_file, dest_file)
                        print("✔")
                    except Exception as e:
                        print(f"✘ ERROR: {e}")

            print(f"DEBUG:     Recursing into {rel_src}\n")
            group_yaml_by_source(src_folder, dest_folder)


if __name__ == "__main__":
    group_yaml_by_source(yaml_exec_source, yaml_exec_tree)


DEBUG: Normalized input_dir = c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\yaml_files\aws\commons
DEBUG: Normalized output_dir = c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\yaml_tree

DEBUG: Scanning directory: c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\yaml_files\aws\commons
DEBUG: └─ Found YAML file: aurora.tf.yaml
DEBUG:     sources found in aurora.tf.yaml:
DEBUG:       • module[0].aurora.source → '../modules/aurora'

DEBUG:     Processing source: '../modules/aurora' (at module[0].aurora.source)
DEBUG:       Absolute src_folder = c:\Users\boadeyem\OneDrive - Indiana University\Documents\Masters-Career Documents\Portfolio\Projects\Gen3\yaml_files\aws\modules\aurora
DEBUG:       Relative src_folder = ..\modules\aurora
DEBUG:       Computed dest_folder = c:\Users\boadeyem\OneDrive - Indiana Universit

In [5]:
import shutil

# dirs_exist_ok=True (Python 3.8+) lets dst already exist
shutil.copytree(yaml_exec_source, yaml_exec_tree, dirs_exist_ok=True)

'yaml_tree'