In [None]:
import pandas as pd
import ast
from collections import defaultdict
from graphviz import Digraph
import os
import shutil
from PIL import Image
import hashlib

# -=-=-=-=-=-=-=-=-=-=-=-=-=-=
# 1. Load the file thru path (Change accordingly)
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=

# Input path
# output path
pre_name = "Mar25\\" + "cali" + "_"
pre_output_name = "Mar25\\202503_" + "cali"

# Load the file
file_path =  pre_name + "unique chain.csv"
df = pd.read_csv(file_path)

# Setup temp folder and clean it if it exists
temp_output_dir = "temp_flowcharts"
if os.path.exists(temp_output_dir):
    shutil.rmtree(temp_output_dir)
os.makedirs(temp_output_dir, exist_ok=True)

# -=-=-=-=-=-=-=-=-=-=-=-=-=-=
# 2. Make the chain
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=

# Group chains by their "head" question
head_chains = defaultdict(list)

for _, row in df.iterrows():
    chain = ast.literal_eval(row['chain_combo'])
    count = row['count']
    if chain:
        head_question = chain[0]
        head_chains[head_question].append((chain, count))

# List to store image paths
png_paths = []

# Create flowcharts per head question
for head, chain_list in head_chains.items():
    dot = Digraph(comment=f"Flowchart: {head[:50]}...", format="png")
    dot.attr(rankdir="TB", size="10,10")  # Top-to-bottom internal layout
    
    node_counts = defaultdict(int)
    edge_counts = defaultdict(int)

    for chain, count in chain_list:
        nodes = [chain[i] for i in range(len(chain))]
        for node in nodes:
            node_counts[node] += count
        for i in range(len(nodes) - 1):
            edge = (nodes[i], nodes[i+1])
            edge_counts[edge] += count

    node_to_id = {node: f"n{i}" for i, node in enumerate(node_counts)}

    for node, node_id in node_to_id.items():
        label = f"{node}\nCount: {node_counts[node]}"
        dot.node(node_id, label, shape="box", style="filled", color="lightblue")

    for (src, dst), weight in edge_counts.items():
        dot.edge(node_to_id[src], node_to_id[dst], label=str(weight))

    safe_hash = hashlib.md5(head.encode('utf-8')).hexdigest()[:10]  # Hash for head data of column
    safe_filename = f"{head[:30].replace(' ', '_').replace('?', '').replace('/', '').replace('[', '').replace(']', '')}_{safe_hash}"

    output_path = os.path.join(temp_output_dir, safe_filename)
    rendered_path = dot.render(output_path, view=False)
    png_path = rendered_path
    png_paths.append(png_path)

# putting all the head png together
images = [Image.open(p) for p in png_paths]
total_width = sum(img.width for img in images)
max_height = max(img.height for img in images)

combined_image = Image.new("RGB", (total_width, max_height), color=(255, 255, 255))

x_offset = 0
for img in images:
    combined_image.paste(img, (x_offset, 0))
    x_offset += img.width

# Save final combined image
final_output_path = pre_output_name + ".png"
combined_image.save(final_output_path)
print(f"✅ Combined flowchart (horizontal) saved as: {final_output_path}")

✅ Combined flowchart (horizontal) saved as: Mar25\202503_cali.png
