In [2]:
import graphviz
import os

# 1. Initialize Graph
dot = graphviz.Digraph('Clinical_Trial_Pipeline', comment='Technical Architecture')

dot.attr(rankdir='LR', splines='ortho', bgcolor='white')
dot.attr('graph', pad='0.5', nodesep='0.5', ranksep='0.6')
dot.attr('node', fontname='Arial', fontsize='11', shape='plain')
dot.attr('edge', color='#0047AB', penwidth='2.0', arrowsize='1.0')

# --- STEP 1: DATA INGESTION ---
dot.node('Step1', '''<
<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="8" BGCOLOR="white" COLOR="#0047AB" FIXEDSIZE="FALSE">
  <TR><TD BGCOLOR="#0047AB" HEIGHT="30"><FONT COLOR="white" POINT-SIZE="12"><B>1. DATA INGESTION</B></FONT></TD></TR>
  <TR><TD ALIGN="LEFT">
    <B>Source:</B> AACT Database<BR/>
    <B>Raw:</B> 500,000+ Trials<BR/>
    <B>Filter:</B> Phase 1, 2, 3 (Drugs)<BR/>
    <B>Target:</B> Terminated vs. Completed<BR/>
    <FONT COLOR="#008080"><B>N = 88,374 Samples</B></FONT>
  </TD></TR>
</TABLE>>''')

# --- STEP 2: FEATURE ENGINEERING ---
dot.node('Step2', '''<
<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="8" BGCOLOR="white" COLOR="#0047AB" FIXEDSIZE="FALSE">
  <TR><TD BGCOLOR="#0047AB" HEIGHT="30"><FONT COLOR="white" POINT-SIZE="12"><B>2. FEATURE ENGINEERING</B></FONT></TD></TR>
  <TR><TD ALIGN="LEFT">
    <B>Structured Logic:</B><BR/>
    • Regex Drug Classifier<BR/>
    • Design Rigor Score
  </TD></TR>
  <TR><TD ALIGN="LEFT">
    <B>NLP Pipeline (BioBERT):</B><BR/>
    • Tokenization &amp; Embeddings<BR/>
    • TF-IDF + PCA Reduction
  </TD></TR>
</TABLE>>''')

# --- STEP 3: THE 4 PILLARS (The Matrix) ---
dot.node('Step3', '''<
<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="6" BGCOLOR="#F4F9FF" COLOR="#0047AB">
  <TR><TD COLSPAN="4" BGCOLOR="#008080" HEIGHT="30"><FONT COLOR="white" POINT-SIZE="12"><B>3. THE FEATURE MATRIX (60 DENSE FEATURES)</B></FONT></TD></TR>
  <TR>
    <TD ALIGN="LEFT" VALIGN="TOP" WIDTH="110">
      <B>A. DESIGN</B><BR/>
      <FONT POINT-SIZE="10">
      • Rigor Score<BR/>
      • Masking<BR/>
      • Allocation<BR/>
      • Endpoints
      </FONT>
    </TD>
    <TD ALIGN="LEFT" VALIGN="TOP" WIDTH="110">
      <B>B. PATIENT</B><BR/>
      <FONT POINT-SIZE="10">
      • Eligibility<BR/>
      • Age / Gender<BR/>
      • Disease Status<BR/>
      • Strictness
      </FONT>
    </TD>
    <TD ALIGN="LEFT" VALIGN="TOP" WIDTH="110">
      <B>C. LANDSCAPE</B><BR/>
      <FONT POINT-SIZE="10">
      • Agent Category<BR/>
      • Competition<BR/>
      • Pathology<BR/>
      • Sub-group
      </FONT>
    </TD>
    <TD ALIGN="LEFT" VALIGN="TOP" WIDTH="110">
      <B>D. SPONSOR</B><BR/>
      <FONT POINT-SIZE="10">
      • Sponsor Tier<BR/>
      • Agency Class<BR/>
      • US Sites<BR/>
      • Track Record
      </FONT>
    </TD>
  </TR>
</TABLE>>''')

# --- STEP 4: MODELING ---
dot.node('Step4', '''<
<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="8" BGCOLOR="white" COLOR="#0047AB" FIXEDSIZE="FALSE">
  <TR><TD BGCOLOR="#0047AB" HEIGHT="30"><FONT COLOR="white" POINT-SIZE="12"><B>4. MODEL ENGINE</B></FONT></TD></TR>
  <TR><TD ALIGN="LEFT">
    <B>Algorithm:</B> XGBoost Classifier<BR/>
    <B>Baseline:</B> vs. Logistic Reg.<BR/>
    <B>Optimization:</B><BR/>
    • Scale_Pos_Weight (1:5.4)<BR/>
    • L2 Regularization<BR/>
    • AUC-PR Metric
  </TD></TR>
</TABLE>>''')

# --- STEP 5: INTERPRETATION ---
dot.node('Step5', '''<
<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="8" BGCOLOR="white" COLOR="#0047AB" FIXEDSIZE="FALSE">
  <TR><TD BGCOLOR="#0047AB" HEIGHT="30"><FONT COLOR="white" POINT-SIZE="12"><B>5. INTERPRETATION</B></FONT></TD></TR>
  <TR><TD ALIGN="LEFT">
    <B>Method:</B> SHAP (TreeExplainer)<BR/>
    <B>Global:</B> Feature Ranking<BR/>
    <B>Local:</B> Risk Drivers<BR/>
    <B>UI:</B> Streamlit App
  </TD></TR>
</TABLE>>''')

# --- CONNECTIONS ---
dot.edge('Step1', 'Step2')
dot.edge('Step2', 'Step3')
dot.edge('Step3', 'Step4')
dot.edge('Step4', 'Step5')

# --- RENDER (FIXED) ---
try:
    # We set view=False to prevent the 'xdg-open' error
    output_path = dot.render('pipeline_slide', format='png', view=False)
    print(f"✅ SUCCESS! Image saved at: {os.path.abspath(output_path)}")
    print("Go to your file explorer and open 'pipeline_slide.png'")
except Exception as e:
    print(f"❌ Error: {e}")
    print("If the error says 'executable not found', you need to install Graphviz on your OS.")
    print("Mac: brew install graphviz")
    print("Windows: Download installer from graphviz.org")
    print("Linux: sudo apt-get install graphviz")

✅ SUCCESS! Image saved at: /home/delaunan/code/delaunan/clintrialpredict/pipeline_slide.png
Go to your file explorer and open 'pipeline_slide.png'
