# PRISMA 2020 Flow Diagram Generator (Dynamic)

This notebook generates the PRISMA 2020 flow diagram for the O-ISAC Systematic Review.
It dynamically reads metrics from `data/status/prisma_metrics.json` located in your Google Drive.

**Instructions:**
1. Run the first cell to mount Google Drive.
2. Run the second cell to install necessary tools.
3. Run the third cell to load data and generate your flowchart.

In [None]:
# 1. Mount Google Drive and Set Path
from google.colab import drive
import os

# Mount Drive
print("üìÇ Mounting Google Drive...")
drive.mount('/content/drive', force_remount=True)

# Define Project Path (Adjust if your folder name is different)
PROJECT_PATH = "/content/drive/MyDrive/AKU_WorkSpace/survey_fdgit/OISAC_PRISMA_COMST"

# Verify Path
if os.path.exists(PROJECT_PATH):
    os.chdir(PROJECT_PATH)
    print(f"‚úÖ Successfully flagged working directory: {os.getcwd()}")
else:
    print(f"‚ùå Path not found: {PROJECT_PATH}")
    print("Please verify your folder structure in the Files pane on the left.")

In [None]:
# 2. Install Graphviz (System + Python)
print("üîÑ Installing Graphviz dependencies...")
!apt-get update -qq
!apt-get install -y graphviz
!pip install graphviz
print("‚úÖ Installation complete.")

In [None]:
# 3. Load Metrics and Generate Flow Diagram
import graphviz
import json
import os

# Load Metrics from JSON
METRICS_FILE = "data/status/prisma_metrics.json"

def load_metrics():
    if not os.path.exists(METRICS_FILE):
        raise FileNotFoundError(f"Metrics file not found at: {METRICS_FILE}")
    
    with open(METRICS_FILE, 'r') as f:
        data = json.load(f)
    print(f"üìä Loaded metrics (Last Updated: {data.get('last_updated', 'Unknown')})")
    return data

def generate_prisma_dot(data):
    dot = graphviz.Digraph('PRISMA_2020', comment='PRISMA 2020 Flow Diagram')
    dot.attr(rankdir='TB', splines='ortho', nodesep='0.8', ranksep='0.6', compound='true')
    
    # Styles
    dot.attr('node', shape='box', style='filled', fillcolor='#f9f9f9', 
             fontsize='11', fontname='Arial', penwidth='1.5', margin='0.2')
    dot.attr('edge', arrowhead='vee', arrowsize='0.8', color='#333333')

    # --- Phase 1: Identification ---
    with dot.subgraph(name='cluster_identification') as c:
        c.attr(label='Identification', style='dashed', color='#aaaaaa', fontcolor='#aaaaaa', labeljust='l')
        c.node('id_databases', label=f"Records identified from:\\nDatabases (n = {data['identified_databases']})\\nRegisters (n = {data['identified_registers']})", width='3.5', fillcolor='#e1f5fe')
        c.node('calc_duplicates', label=f"Records removed before screening:\\nDuplicate records removed (n = {data['duplicates_removed']})", width='3.5')
        c.edge('id_databases', 'calc_duplicates')

    # --- Phase 2: Screening ---
    with dot.subgraph(name='cluster_screening') as c:
        c.attr(label='Screening', style='dashed', color='#aaaaaa', fontcolor='#aaaaaa', labeljust='l')
        c.node('screened', label=f"Records screened\\n(n = {data['records_screened']})", width='3.5', fillcolor='#fff3e0')
        c.node('excluded_screening', label=f"Records excluded\\n(n = {data['excluded_title_abstract']})", width='3.5')
        c.node('retrieved', label=f"Reports sought for retrieval\\n(n = {data['retrieved_full_text']})", width='3.5', fillcolor='#fff3e0')
        c.node('not_retrieved', label=f"Reports not retrieved\\n(n = {data['not_retrieved']})", width='3.5')
        c.node('assessed', label=f"Reports assessed for eligibility\\n(n = {data['assessed_full_text']})", width='3.5', fillcolor='#fff3e0')
        
        reasons_text = "\\n".join([f"{k} (n = {v})" for k, v in data['reasons_excluded'].items()])
        c.node('excluded_full', label=f"Reports excluded:\\n{reasons_text}", width='3.5', justify='l')
        
        c.edge('screened', 'excluded_screening', constraint='false')
        c.edge('retrieved', 'not_retrieved', constraint='false')
        c.edge('assessed', 'excluded_full', constraint='false')

    # --- Phase 3: Included ---
    with dot.subgraph(name='cluster_included') as c:
        c.attr(label='Included', style='dashed', color='#aaaaaa', fontcolor='#aaaaaa', labeljust='l')
        c.node('included', label=f"Studies included in review\\n(n = {data['included_studies']})", width='3.5', fillcolor='#e8f5e9')

    # Edges
    dot.edge('calc_duplicates', 'screened')
    dot.edge('screened', 'retrieved')
    dot.edge('retrieved', 'assessed')
    dot.edge('assessed', 'included')
    
    # Invisible constraints
    with dot.subgraph() as s:
        s.attr(rank='same'); s.node('screened'); s.node('excluded_screening'); s.edge('screened', 'excluded_screening', style='invis')
    with dot.subgraph() as s:
        s.attr(rank='same'); s.node('retrieved'); s.node('not_retrieved'); s.edge('retrieved', 'not_retrieved', style='invis')
    with dot.subgraph() as s:
        s.attr(rank='same'); s.node('assessed'); s.node('excluded_full'); s.edge('assessed', 'excluded_full', style='invis')

    return dot

# Main Execution
try:
    metrics = load_metrics()
    dot = generate_prisma_dot(metrics)

    # Ensure output directory
    output_dir = os.path.join(os.getcwd(), 'data', 'figures')
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, 'PRISMA_2020_flow_diagram')

    # Render
    dot.render(output_path, format='png', cleanup=True)
    dot.render(output_path, format='pdf', cleanup=True)

    print(f"‚úÖ Diagram Generated! Saved to: {output_path}.png")
except Exception as e:
    print(f"‚ùå Error: {e}")

dot # Display
