Notes on how Spaceflight Experiments are structured

- Group by Experimental Group [Flight (F), Live Animal Return (LAR), Ground Control (GC), ISS-Terminal (ISS-T), Basal (B), Vivarium (V)]
- Group by Gravity Level [microgravity, partial gravity, 1G, 2G]
- Group by different treatment groups and additional factors


In [None]:
import pandas as pd

# Read in your Excel file
df = pd.read_excel('RR-28.xlsx')

# Adjusting the dataframe as needed
df1 = df[1:]
df1.columns = df1.iloc[0]
df1 = df1[1:]

if df1['Treatment 1'].any():
  # Group by the 'Total absorbed dose per particle type'
  grouped = df1.groupby(['Experimental Group (rdrc_name)', 'Treatment 1'])
else:
  grouped = df1.groupby('Experimental Group (rdrc_name)')
# Get the first row for each group
df_grouped = grouped.first().reset_index()

# Add a new column 'subjects' with the count of rows in each group
df_grouped['Subjects'] = grouped.size().values


Workbook contains no default style, apply openpyxl's default



In [None]:
df1.columns

Index(['Ear ID', '<SLIMSGUID>', 'Days in uG', 'Days in uG unit',
       'Gravity level', 'Experimental condition', 'Subject description',
       'Barcode', 'Derivation count', 'Protocol_approach', 'Additional notes',
       'Dissection time zone', 'Dissection start time', 'Tail ID',
       'ALSDA subject ID', 'Subject supplier', 'Strain', 'Genotype', 'Diet',
       'Diet after landing', 'Foodbar lot number', 'Foodbar nutrition values',
       'Feeding schedule', 'Husbandry', 'Treatment 1', 'Enrichment material',
       'Light cycle', 'Carcass preservation method', 'Partial weight date',
       'Category (cntp_name)', 'Type (cntp_name)', 'Sponsor',
       'Payload ID (rdrc_name)', 'Id', 'NBISC Status (rdrc_name)',
       'Radio Frequency Identification (RFID)', 'Sex', 'Date of birth',
       'Date of delivery to ACF', 'Acclimation start date',
       'Acclimation configuration', 'Mission ID', 'Age at launch/ start',
       'Body weight at launch', 'Pre-launch weight (Unit)',
       'Dat

In [None]:
df_grouped

1,Experimental Group (rdrc_name),Treatment 1,Ear ID,<SLIMSGUID>,Days in uG,Days in uG unit,Gravity level,Experimental condition,Subject description,Barcode,...,Euthanasia location,Whole body weight,Last weight (Unit),Date weighed closest to euthanasia,Partial body weight,Partial weight (Unit),Spaceflight,BSP dissection date,Dissection order,Subjects
0,F,Balanced Salt Solution (BSS),,fccf8a4e-9d5b-4190-a3c1-b290204e2b0e,40,d,uG,,,Mmus00008289,...,"KSC, FL",21.3,g,05/05/2024,0,0,Spaceflight,05/05/2024,Randomized,20
1,F,rAAV - adeno-associated viral (AAV) gene thera...,,196ebf1a-b944-4f19-b3b1-9a65b9aaef66,40,d,uG,,,Mmus00008301,...,"KSC, FL",24.0,g,05/05/2024,0,0,Spaceflight,05/05/2024,Randomized,20
2,GC,Balanced Salt Solution (BSS),,617818f8-10ec-48a0-9b49-e6d9ec62cc7f,0,d,1G,,,Mmus00008329,...,"KSC, FL",29.4,g,05/10/2024,0,0,Ground control,05/10/2024,Randomized,20
3,GC,rAAV - adeno-associated viral (AAV) gene thera...,,708db527-dd7b-41d8-87a2-8b608700702c,0,d,1G,,,Mmus00008338,...,"KSC, FL",22.2,g,05/10/2024,0,0,Ground control,05/10/2024,Randomized,20


In [None]:
import pandas as pd
import re

# ----------------------------
# Helper Function to Parse Week Ranges
# ----------------------------
def parse_week_range(value):
    """
    Convert a string like "29-30 weeks" to a numerical value.
    If a range is given, it returns the average (e.g., (29+30)/2 = 29.5).
    If only one number is found, that number is returned.
    """
    if pd.isna(value):
        return None
    # Find all numbers (integer or float) in the string
    numbers = re.findall(r'\d+(?:\.\d+)?', str(value))
    if not numbers:
        return None
    # Convert extracted number strings to float values
    numbers = [float(num) for num in numbers]
    # Return the average if a range is provided, or the number itself
    return sum(numbers) / len(numbers)

# ----------------------------
# Preprocessing: Convert String Week Ranges to Numeric
# ----------------------------
# Apply the conversion function to the relevant columns in df_grouped
df_grouped['Age at euthanasia (weeks)'] = df_grouped['Age at euthanasia (weeks)'].apply(parse_week_range)
df_grouped['Age at launch/ start'] = df_grouped['Age at launch/ start'].apply(parse_week_range)

# ----------------------------
# Building the DataFrame df2 using the Processed Data
# ----------------------------
df2 = pd.DataFrame()

df2['Group'] = df_grouped["Experimental Group (rdrc_name)"]
df2['Gravity'] = df_grouped['Gravity level']
df2["Subjects"] = df_grouped['Subjects']
df2["Age at Launch"] = df_grouped['Age at launch/ start']   # Now numeric via parse_week_range
df2["Landing to Euthanasia"] = df_grouped["Landing to euthanasia"]

# Compute the Recovery Timeline in weeks:
# (Age at euthanasia) - (Age at launch) - (Landing to euthanasia in days / 7)
df2["Recovery Timeline"] = (
    df_grouped['Age at euthanasia (weeks)'] -
    df_grouped['Age at launch/ start'] -
    (df_grouped['Landing to euthanasia'] / 7)
)

df2["Termination Date"] = df_grouped["Date of euthanasia, death or sample collection"]
df2["Gender"] = df_grouped["Sex"]
df2["Treatment"] = df_grouped["Treatment 1"]

if 'Age at euthanasia (weeks)' in df_grouped.columns:
    df2["Age at euthanasia"] = df_grouped["Age at euthanasia (weeks)"]
else:
    df2["Age at euthanasia"] = None

# Correctly check if either "Treatment 2" or "Treatment 3" are present
if "Treatment 2" in df_grouped.columns or "Treatment 3" in df_grouped.columns:
    # Ensure the values are strings to avoid type issues during concatenation
    df2["Treatment"] = (
        df_grouped["Treatment 1"].astype(str) + " " +
        df_grouped.get("Treatment 2", "").astype(str) + " " +
        df_grouped.get("Treatment 3", "").astype(str)
    )


In [None]:
df2

Unnamed: 0,Group,Gravity,Subjects,Age at Launch,Landing to Euthanasia,Recovery Timeline,Termination Date,Gender,Treatment,Age at euthanasia
0,F,uG,20,22.5,5,5.285714,05/05/2024,Female,Balanced Salt Solution (BSS) 0 None,28.5
1,F,uG,20,22.5,5,5.285714,05/05/2024,Female,rAAV - adeno-associated viral (AAV) gene thera...,28.5
2,GC,1G,20,22.5,10,5.571429,05/10/2024,Female,Balanced Salt Solution (BSS) 0 None,29.5
3,GC,1G,20,22.5,10,5.571429,05/10/2024,Female,rAAV - adeno-associated viral (AAV) gene thera...,29.5


In [None]:
# # Convert termination date to datetime (adjust the format if needed)
df2["Termination Date"] = pd.to_datetime(df2["Termination Date"])
# # Convert age at irradiation and recovery timeline to numeric (weeks)
df2["Age at Launch"] = pd.to_numeric(df2['Age at Launch'], errors='coerce')
df2["Recovery Timeline"] = pd.to_numeric(df2['Recovery Timeline'], errors='coerce')
if df2["Treatment"].any():
  df2["Experimental Group + Treatment"] = df2["Group"] + " | " + df2["Treatment"]
else:
  df2["Experimental Group + Treatment"] = df2["Group"]

df = df2

In [None]:
df

Unnamed: 0,Group,Gravity,Subjects,Age at Launch,Landing to Euthanasia,Recovery Timeline,Termination Date,Gender,Treatment,Age at euthanasia,Experimental Group + Treatment
0,F,uG,20,22.5,5,5.285714,2024-05-05,Female,Balanced Salt Solution (BSS) 0 None,28.5,F | Balanced Salt Solution (BSS) 0 None
1,F,uG,20,22.5,5,5.285714,2024-05-05,Female,rAAV - adeno-associated viral (AAV) gene thera...,28.5,F | rAAV - adeno-associated viral (AAV) gene t...
2,GC,1G,20,22.5,10,5.571429,2024-05-10,Female,Balanced Salt Solution (BSS) 0 None,29.5,GC | Balanced Salt Solution (BSS) 0 None
3,GC,1G,20,22.5,10,5.571429,2024-05-10,Female,rAAV - adeno-associated viral (AAV) gene thera...,29.5,GC | rAAV - adeno-associated viral (AAV) gene ...


In [None]:
# Graphical Abstract Generator for Dynan Experiments (NBISC)
# Author: Hari Parthasarathy
# Description: Parses radiation biology experiment metadata from Excel and outputs a publication-ready Gantt chart

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

In [None]:
# ----------------------------
# STEP 2: Compute Timeline Dates
# ----------------------------
# Convert Recovery Timeline from weeks to timedelta (using unit='W' means one week = 7 days)
df["Recovery Timeline"] = pd.to_timedelta(df["Recovery Timeline"], unit='W')
# Compute Irradiation Date: Termination Date minus Recovery Timeline
df["Launch Date"] = df["Termination Date"] - df["Recovery Timeline"]
# Compute DOB (Date of Birth) assuming Age at Irradiation is given in weeks
df["DOB"] = df["Launch Date"] - pd.to_timedelta(df["Age at Launch"], unit='W')

In [None]:
# === STEP 3: Create Composite Category for Legend ===
treatment_positive_term = "rAAV"
df["Legend_Category"] = df.apply(
    lambda row: f"{row['Gender']} | {row['Gravity']} | {'rAAV' if treatment_positive_term in row['Treatment'] else 'N/A'} | {row['Group']}",
    axis=1
)

In [None]:
# === STEP 4: Define Color & Opacity Mapping ===
spaceflight = {'F', 'LAR', 'ISS-Terminal'}
ground_control = {'GC', 'B', 'V'}

def assign_color(row):
    if row['Group'] in spaceflight:
        print(row['Group'])
        return '#90d5ff' if row['Gender'] == 'Male' else '#ff69b4'
    elif row['Group'] in ground_control:
        print(row['Group'])
        return '#000080' if row['Gender'] == 'Male' else '#E9967A'
    else:
        return '#4C72B0' if row['Gender'] == 'Male' else '#E9967A'

def assign_treatment_color(row):
    return "#0D98BA" if treatment_positive_term in row["Treatment"] else "#F7BAA8"

def assign_opacity(row):
    return 1.0 if row['Group'] in spaceflight else 0.3

df['Color'] = df.apply(assign_color, axis=1)
df["Treatment_Color"] = df.apply(assign_treatment_color, axis=1)
df['Opacity'] = df.apply(assign_opacity, axis = 1)

# Store initial group order
default_y_order = df["Group"].unique().tolist()


F
F
GC
GC


In [None]:
# ----------------------------
# STEP 4: Create the Timeline (Gantt) Chart
# ----------------------------
fig = px.timeline(
    df,
    x_start="Launch Date",
    x_end="Termination Date",
    y="Experimental Group + Treatment",
    color="Legend_Category",
    # Map each legend category to its color
    color_discrete_map={cat: df[df["Legend_Category"] == cat]["Color"].iloc[0]
                          for cat in df["Legend_Category"].unique()},
    text=df.apply(lambda row: f"{row['Subjects']} | {row['Group']}", axis=1),
    hover_data=["Group", "Gravity", "Treatment", "DOB"]
)
fig.update_traces(textposition="outside", textfont=dict(size=10))

# Add gray bars representing the period from DOB to Irradiation Date
dob_traces = px.timeline(
    df.assign(Color="rgba(128, 128, 128, 0.2)"),
    x_start="DOB",
    x_end="Launch Date",
    y="Experimental Group + Treatment",
    color="Legend_Category",
    color_discrete_map={cat: "rgba(128, 128, 128, 0.2)" for cat in df["Legend_Category"].unique()}
).data

for trace in dob_traces:
    trace.legendgroup = trace.name
    trace.showlegend = False
    fig.add_trace(trace)

In [None]:
# Update layout with annotations, axis settings, and chart title
fig.update_traces(textposition='outside', textfont=dict(size=20))
fig.update_layout(
    width=2000,
    height=1000,
    xaxis=dict(
        range=[
            df["Launch Date"].min() - pd.Timedelta(days=100),
            df["Termination Date"].max() + pd.Timedelta(days=50)
        ],
        tickfont = dict(size=30)
    ),
    yaxis=dict(
        tickfont=dict(size=30)
    ),
    xaxis_title_text='Timeline', xaxis_title_font_size=18, xaxis_tickfont_size=14,
    yaxis_title_text='Experimental Group & Treatment', yaxis_title_font_size=18, yaxis_tickfont_size=14,
    legend_title_text='Gender | Group | Treatment | Gravity', legend_title_font_size=16, legend_font_size=14,
    showlegend=True
)

# Display the timeline chart
fig.show()

In [None]:
print(df[["DOB", "Launch Date", "Termination Date"]].head())

                  DOB         Launch Date Termination Date
0 2017-06-02 12:00:00 2017-08-11 12:00:00       2017-08-15
1 2017-06-05 12:00:00 2017-08-14 12:00:00       2017-09-22
2 2018-03-07 12:00:00 2018-05-16 12:00:00       2018-06-24
3 2017-06-08 12:00:00 2017-08-17 12:00:00       2017-09-18
4 2018-03-08 12:00:00 2018-05-17 12:00:00       2018-06-18
