In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# ----------------------------
# STEP 1: Load and Prepare Data
# ----------------------------
def generate_GAV(file_path):
  xls = pd.ExcelFile(file_path)
  df = pd.read_excel(xls)

  # Assume the first row is metadata, and the actual header is in the second row
  df1 = df[1:].copy()
  df1.columns = df1.iloc[0]
  df1 = df1[1:]

  # Create a new dataframe for the analysis
  df2 = pd.DataFrame()
  df2['Group'] = df1["Experimental Group (rdrc_name)"]
  # Ensure numeric/string conversion if needed for dose and beam
  df2['Dose'] = df1['Total absorbed dose per particle type'].astype(str) + " " + df1['Total absorbed dose (Unit)'].astype(str)
  df2['Beam'] = df1['Radiation beam type'].astype(str) + " " + df1['Ionizing radiation  (in order of administration)'].astype(str)
  # Count subjects per unique dose combination
  df2["Subjects"] = df2.groupby("Dose")["Dose"].transform('count')

  # Convert termination date to datetime (adjust the format if needed)
  df2["Termination Date"] = pd.to_datetime(df1["Date of euthanasia, death or sample collection"], format="%d/%m/%Y")
  # Convert age at irradiation and recovery timeline to numeric (weeks)
  df2["Age at Irradiation"] = pd.to_numeric(df1['Age at irradiation (weeks)'], errors='coerce')
  df2["Recovery Timeline"] = pd.to_numeric(df1['Time point of sacrifice post irradiation (weeks)'], errors='coerce')

  # Treatment and Gender
  df2["Treatment"] = df1["Treatment 1"].astype(str)
  df2["Gender"] = df1["Sex"].astype(str)

  # If Treatment 2 and Treatment 3 exist, combine them with Treatment 1
  if "Treatment 2" in df1.columns and "Treatment 3" in df1.columns:
      df2["Treatment"] = (df1["Treatment 1"].astype(str) + " " +
                          df1["Treatment 2"].astype(str) + " " +
                          df1["Treatment 3"].astype(str))

  # ----------------------------
  # STEP 2: Compute Timeline Dates
  # ----------------------------
  # Convert Recovery Timeline from weeks to timedelta (using unit='W' means one week = 7 days)
  df2["Recovery Timeline"] = pd.to_timedelta(df2["Recovery Timeline"], unit='W')
  # Compute Irradiation Date: Termination Date minus Recovery Timeline
  df2["Irradiation Date"] = df2["Termination Date"] - df2["Recovery Timeline"]
  # Compute DOB (Date of Birth) assuming Age at Irradiation is given in weeks
  df2["DOB"] = df2["Irradiation Date"] - pd.to_timedelta(df2["Age at Irradiation"], unit='W')

  # ----------------------------
  # STEP 3: Create Composite Legend Category and Color Mappings
  # ----------------------------
  # Define the term for a positive treatment (e.g., "NR+")
  treatment_positive_term = "NR+"
  df2["Legend_Category"] = df2.apply(
      lambda row: f"{row['Gender']} | {row['Beam']} | {'NR+' if row['Treatment'].strip() == treatment_positive_term else 'NR-'} | {row['Dose']}",
      axis=1
  )

  # Define base colors for genders
  base_colors = {"Male": "#4C72B0", "Female": "#E9967A"}
  df2["Color"] = df2["Gender"].map(base_colors).fillna("#999999")
  # Define treatment-specific colors
  df2["Treatment_Color"] = df2["Treatment"].apply(lambda x: "#0D98BA" if x.strip() == treatment_positive_term else "#F7BAA8")
  # Set opacity based on the Beam value (if "Control" is in Beam, set to 0.2)
  df2["Opacity"] = df2["Beam"].apply(lambda x: 0.2 if "Control" in str(x) else 1.0)

  # ----------------------------
  # STEP 4: Create the Timeline (Gantt) Chart
  # ----------------------------
  fig = px.timeline(
      df2,
      x_start="Irradiation Date",
      x_end="Termination Date",
      y="Dose",
      color="Legend_Category",
      # Map each legend category to its color
      color_discrete_map={cat: df2[df2["Legend_Category"] == cat]["Color"].iloc[0]
                            for cat in df2["Legend_Category"].unique()},
      text=df2.apply(lambda row: f"{row['Subjects']} | {row['Dose']}", axis=1),
      hover_data=["Beam", "Dose", "Treatment", "DOB"]
  )
  fig.update_traces(textposition="outside", textfont=dict(size=10))

  # Add gray bars representing the period from DOB to Irradiation Date
  dob_traces = px.timeline(
      df2.assign(Color="rgba(128, 128, 128, 0.2)"),
      x_start="DOB",
      x_end="Irradiation Date",
      y="Dose",
      color="Legend_Category",
      color_discrete_map={cat: "rgba(128, 128, 128, 0.2)" for cat in df2["Legend_Category"].unique()}
  ).data

  for trace in dob_traces:
      trace.legendgroup = trace.name
      trace.showlegend = False
      fig.add_trace(trace)

  # ----------------------------
  # STEP 5: Add Annotations for Treatment & Beam
  # ----------------------------
  annotations = []
  for idx, row in df2.iterrows():
      # Add gray rectangle annotations (using text symbols for visualization)
      annotations.append(dict(
          x=row["Irradiation Date"] - pd.Timedelta(days=230),
          y=row["Dose"],
          text="■■■",
          showarrow=False,
          font=dict(color=f"rgba(128, 128, 128, {row['Opacity']})", size=18)
      ))
      # If treatment is positive, add a plus sign annotation
      if row["Treatment"].strip() == treatment_positive_term:
          annotations.append(dict(
              x=row["Irradiation Date"] - pd.Timedelta(days=200),
              y=row["Dose"],
              text=" + ",
              showarrow=False,
              font=dict(color=row["Treatment_Color"], size=18)
          ))

  # Update layout with annotations, axis settings, and chart title
  fig.update_layout(
      annotations=annotations,
      width=2140,
      height=800,
      xaxis=dict(
          range=[
              df2["Irradiation Date"].min() - pd.Timedelta(days=320),
              df2["Termination Date"].max() + pd.Timedelta(days=100)
          ]
      ),
      legend_title_text="Gender | Beam Type | Treatment | Dose",
      xaxis_title="Timeline",
      yaxis_title="Dose (Ordered)",
      title="Graphical Abstract for Dynan Experiments",
      showlegend=True
  )

  # Display the timeline chart
  fig.show()

  # ----------------------------
  # STEP 6: Display the Encoding Matrix
  # ----------------------------
  encoding_matrix = pd.DataFrame({
      "Factor": ["Gender", "Beam Type", "Treatment", "Group", "Subjects", "Dose"],
      "Visual Interpretation": [
          "Base colors: Blue (Male) and Coral Pink (Female)",
          "Opacity: Controls (20% opacity) vs. Beam (100% opacity)",
          "Treatment Indicator: Sage Green (Treated, NR+) vs. Burnt Orange (Untreated, NR-)",
          "Displayed along Y-axis based on group categorization",
          "Each bar labeled with the number of subjects",
          "Each bar labeled with the respective dose (e.g., '1.5 cGy')"
      ],
      "Example": [
          "Male = #4C72B0, Female = #E9967A",
          "Control Opacity = 0.2, Beam Opacity = 1.0",
          "NR+ = #0D98BA, NR- = #F7BAA8",
          "Groups sorted by a specific order",
          "E.g., '10 subjects'",
          "E.g., '2.0 cGy'"
      ]
  })

  encoding_matrix_fig = go.Figure(data=[go.Table(
      header=dict(values=["Factor", "Visual Interpretation", "Example"], fill_color='lightblue', align='left'),
      cells=dict(values=[encoding_matrix[col] for col in encoding_matrix.columns], fill_color='lavender', align='left')
  )])
  encoding_matrix_fig.update_layout(title_text="Encoding Matrix for Graphical Abstract")
  encoding_matrix_fig.show()

  return "Figures Created and Shown"


In [None]:
file_path = "/Users/sdubiel/Downloads/Content_20250403_204322.xlsx"
generate_GAV(file_path)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/sdubiel/Downloads/Content_20250403_204322.xlsx'

In [None]:
file_path = "Blakely G404-12.xlsx"
generate_GAV(file_path)