In [11]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import neutcurve
import scipy.stats
print(f"Using `neutcurve` version {neutcurve.__version__}")
import sys

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

#import altair themes from /data/custom_analyses_data/theme.py and enable
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')
os.chdir('/fh/fast/bloom_j/computational_notebooks/tmcmahon/2024/02_RSV/RSV_evolution_neut') #sets working directory to repo/project root folder


Using `neutcurve` version 2.1.0


In [12]:
df = pd.read_csv('01_data/other/25.02.10_freeze_ct_titer.csv')
df['virus'] = df['virus'].str.strip()
print(df['virus'].unique())
df

['bald' 'F only' 'full G' 'G -3' 'G -7' 'G -11' 'G -15' 'G -19' 'G -23'
 'G -27' 'G -31' 'G -35' 'G only' 'full F' 'F -4' 'F -8' 'F -12' 'F -16'
 'F -20' 'F -22' 'VSV-G' 'Fresh 1' 'Fresh 2' '-80 Freeze RT thaw 1'
 '-80 Freeze RT thaw 2' '-80 Freeze 37 thaw 1' '-80 Freeze 37 thaw 2'
 'Flash Freeze RT thaw 1' 'Flash Freeze RT thaw 2'
 'Flash Freeze 37 thaw 1' 'Flash Freeze 37 thaw 2']


Unnamed: 0,cell line,rep,virus number,virus,RLU,uL,RLU/uL,experiment
0,293T,1,1,bald,19830,50.000000,396.6,ct del
1,293T,1,1,bald,1622,12.500000,129.76,ct del
2,293T,1,1,bald,661.5,3.125000,211.68,ct del
3,293T,1,1,bald,72.16,0.781250,92.3648,ct del
4,293T,1,1,bald,52.59,0.195312,269.2608,ct del
...,...,...,...,...,...,...,...,...
731,293T-TIM1,2,11.5 B,Flash Freeze 37 thaw 2,over,12.500000,#VALUE!,freeze
732,293T-TIM1,2,11.5 B,Flash Freeze 37 thaw 2,225400000,3.125000,72128000,freeze
733,293T-TIM1,2,11.5 B,Flash Freeze 37 thaw 2,66340000,0.781250,84915200,freeze
734,293T-TIM1,2,11.5 B,Flash Freeze 37 thaw 2,18200000,0.195312,93184000,freeze


In [13]:
#remove values that were 'over'
# Remove rows where 'RLU/uL' column has the string '#VALUE!'
df = df[df['RLU/uL'] != '#VALUE!']

# Reset index (optional)
df = df.reset_index(drop=True)

# Display the cleaned DataFrame
df

Unnamed: 0,cell line,rep,virus number,virus,RLU,uL,RLU/uL,experiment
0,293T,1,1,bald,19830,50.000000,396.6,ct del
1,293T,1,1,bald,1622,12.500000,129.76,ct del
2,293T,1,1,bald,661.5,3.125000,211.68,ct del
3,293T,1,1,bald,72.16,0.781250,92.3648,ct del
4,293T,1,1,bald,52.59,0.195312,269.2608,ct del
...,...,...,...,...,...,...,...,...
657,293T-TIM1,1,11.5 B,Flash Freeze 37 thaw 2,4259000,0.048828,87224320,freeze
658,293T-TIM1,2,11.5 B,Flash Freeze 37 thaw 2,225400000,3.125000,72128000,freeze
659,293T-TIM1,2,11.5 B,Flash Freeze 37 thaw 2,66340000,0.781250,84915200,freeze
660,293T-TIM1,2,11.5 B,Flash Freeze 37 thaw 2,18200000,0.195312,93184000,freeze


In [14]:
df.head

<bound method NDFrame.head of      cell line  rep virus number                   virus        RLU  \
0         293T    1            1                    bald      19830   
1         293T    1            1                    bald       1622   
2         293T    1            1                    bald      661.5   
3         293T    1            1                    bald      72.16   
4         293T    1            1                    bald      52.59   
..         ...  ...          ...                     ...        ...   
657  293T-TIM1    1       11.5 B  Flash Freeze 37 thaw 2    4259000   
658  293T-TIM1    2       11.5 B  Flash Freeze 37 thaw 2  225400000   
659  293T-TIM1    2       11.5 B  Flash Freeze 37 thaw 2   66340000   
660  293T-TIM1    2       11.5 B  Flash Freeze 37 thaw 2   18200000   
661  293T-TIM1    2       11.5 B  Flash Freeze 37 thaw 2    4627000   

            uL    RLU/uL experiment  
0    50.000000     396.6     ct del  
1    12.500000    129.76     ct del  
2  

In [15]:
# Define a function to configure the chart style
def configure_chart(chart):
    return chart.configure_axis(
        labelFontSize=14,
        titleFontSize=14,
        grid=False
    ).configure_view(
        strokeWidth=2
    )

# Define a selection for hover
hover = alt.selection_point(fields=["virus", "replicate"], nearest=True, on="mouseover", empty="none")

# Create the base chart
base = alt.Chart(df).encode(
    x=alt.X(
        'virus:N',
        title='Virus',
        axis=alt.Axis(labelAngle=45)
    ),
    y=alt.Y(
        'RLU/uL:Q',
        title='RLU/uL',
        scale=alt.Scale(type='log')  # Optional: Use log scale if needed
    ),
    color=alt.Color(
        'rep:N',
        title='Replicate',
        legend=alt.Legend(title="Replicate")
    ),
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('cell line:N', title='Target Cell'),
        alt.Tooltip('rep:N', title='Replicate'),
        alt.Tooltip('experiment:Q', title='Experiment'),
        alt.Tooltip('RLU:Q', title='RLU'),
        alt.Tooltip('uL:Q', title='uL'),
        alt.Tooltip('RLU/uL:Q', title='RLU/uL')
    ]
)

# Points and hover logic
points = base.mark_point(size=80, filled=True).add_params(hover)

# Add a rule (highlight nearest point)
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Combine points and highlight, and facet by 'target cell'
faceted_chart = (
    (points + highlight)
    .properties(width=900, height=500)
    .facet(
        facet=alt.Facet('cell line:N', title='Target Cell')
    )
)

# Configure and display the chart
final_chart = configure_chart(faceted_chart)
final_chart


## look at samples with high error for linearity 

In [16]:
# Define the mapping from 'uL' to 'dilution'
dilution_mapping = {
    50: 1,
    12.5: 2,
    3.125: 3,
    0.78125: 4,
    0.1953125: 5,
    0.048828125: 6
}

# Apply the mapping to create a new column
df['dilution'] = df['uL'].map(dilution_mapping)

# Display the first few rows to check
df.head()

Unnamed: 0,cell line,rep,virus number,virus,RLU,uL,RLU/uL,experiment,dilution
0,293T,1,1,bald,19830.0,50.0,396.6,ct del,1.0
1,293T,1,1,bald,1622.0,12.5,129.76,ct del,2.0
2,293T,1,1,bald,661.5,3.125,211.68,ct del,3.0
3,293T,1,1,bald,72.16,0.78125,92.3648,ct del,4.0
4,293T,1,1,bald,52.59,0.195312,269.2608,ct del,5.0


In [17]:
# -80 Freeze 37 thaw 1, -80 Freeze 37 thaw 2, bald, G only, F only, F -16, F -20

# Define the list of viruses to include in the plot
selected_viruses = [
    "-80 Freeze 37 thaw 1", "-80 Freeze 37 thaw 2", "bald", 
    "G only", "F only", "F -16", "F -20"
]

# Filter the dataframe to include only the selected viruses
df_filtered = df[df['virus'].isin(selected_viruses)]

# Create the Altair chart with separate lines for each 'rep'
chart = alt.Chart(df_filtered).mark_line(point=True).encode(
    x=alt.X('dilution:Q', title='dilution'),  # X-axis
    y=alt.Y('RLU:Q', title='RLU', scale=alt.Scale(type='log')),  # Y-axis (separate per virus)
    color=alt.Color('cell line:N', title='Cell Line'),  # Different colors for each cell line
    strokeDash=alt.StrokeDash('rep:N', title='Replicate'),  # Different line styles for each 'rep'
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('cell line:N', title='Cell Line'),
        alt.Tooltip('rep:N', title='Replicate'),
        alt.Tooltip('uL:Q', title='uL'),
        alt.Tooltip('RLU:Q', title='RLU'),
        alt.Tooltip('RLU/uL:Q', title='RLU/uL'),
        alt.Tooltip('dilution:Q', title='dilution')
    ]
).facet(
    row=alt.Row('virus:N', title='Virus')  # Facet by virus (each virus gets a row)
).resolve_scale(
    y='independent'  # Allow each facet to have a separate Y-axis range
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_view(
    strokeWidth=2
)

# Display the chart
chart.save("03_output/plots/Titer_Ctdel_linear.html")
chart

## based on above we want to toss
- "-80 Freeze 37 thaw 1", dilution: 1 cell line: 293T, 293T-TIM1
- "-80 Freeze 37 thaw 2", dilution: 1 cell line: 293T, 293T-TIM1
- "bald", dilution: 5,6 cell line: 293T, 293T-TIM1  
- "G only", dilution: 4 cell line: 293T 
- "F only", dilution: 4,5,6 cell line: 293T
- "F -16", dilution: 1, 6 cell line: 293T-TIM1
- "F -20", dilution: 5, 6 cell line: 293T-TIM1

In [38]:
# Define the conditions for removal
removal_conditions = [
    ("-80 Freeze 37 thaw 1", [1], ["293T", "293T-TIM1"]),
    ("-80 Freeze 37 thaw 2", [1], ["293T", "293T-TIM1"]),
    ("bald", [5, 6], ["293T", "293T-TIM1"]),
    ("G only", [4], ["293T"]),
    ("F only", [4, 5, 6], ["293T"]),
    ("F -16", [1, 6], ["293T-TIM1"]),
    ("F -20", [5, 6], ["293T-TIM1"])
]

# Create a mask to filter out unwanted data
mask = pd.Series(False, index=df.index)  # Start with all False

for virus, dilutions, cell_lines in removal_conditions:
    mask |= (df['virus'] == virus) & (df['dilution'].isin(dilutions)) & (df['cell line'].isin(cell_lines))

# Create a new dataframe with removed data
df_removed = df[mask].copy()

# Keep only the data that doesn't match the removal conditions
df = df[~mask].copy()

# Display the first few rows of the cleaned dataframe
df.head()


Unnamed: 0,cell line,rep,virus number,virus,RLU,uL,RLU/uL,experiment,dilution
0,293T,1,1,bald,19830.0,50.0,396.6,ct del,1.0
1,293T,1,1,bald,1622.0,12.5,129.76,ct del,2.0
2,293T,1,1,bald,661.5,3.125,211.68,ct del,3.0
3,293T,1,1,bald,72.16,0.78125,92.3648,ct del,4.0
6,293T,2,1,bald,12970.0,50.0,259.4,ct del,1.0


## add error to new df 

In [39]:
# Convert 'RLU/uL' to numeric, forcing errors to NaN
df['RLU/uL'] = pd.to_numeric(df['RLU/uL'], errors='coerce')

# Drop rows where 'RLU/uL' is NaN (i.e., non-numeric values were present)
df = df.dropna(subset=['RLU/uL'])

# Get unique experiment values
experiment_values = df['experiment'].unique()

# Ensure we have at least two unique experiments
if len(experiment_values) < 2:
    raise ValueError("Less than two unique experiment values found in the 'experiment' column.")

# Separate the DataFrame by experiment values
experiment_1, experiment_2 = experiment_values[:2]  # Taking the first two unique values

# Create first df_agg for experiment_1
df_agg_exp1 = df[df['experiment'] == experiment_1].groupby(['virus', 'cell line', 'experiment']).agg(
    mean_RLU_uL=('RLU/uL', 'mean'),
    std_RLU_uL=('RLU/uL', 'std')
).reset_index()

# Calculate upper and lower limits for experiment_1
df_agg_exp1['lower_RLU_uL'] = df_agg_exp1['mean_RLU_uL'] - df_agg_exp1['std_RLU_uL']
df_agg_exp1['upper_RLU_uL'] = df_agg_exp1['mean_RLU_uL'] + df_agg_exp1['std_RLU_uL']
df_agg_exp1['lower_RLU_uL'] = df_agg_exp1['lower_RLU_uL'].clip(lower=100)

# Create second df_agg for experiment_2
df_agg_exp2 = df[df['experiment'] == experiment_2].groupby(['virus', 'cell line', 'experiment']).agg(
    mean_RLU_uL=('RLU/uL', 'mean'),
    std_RLU_uL=('RLU/uL', 'std')
).reset_index()

# Calculate upper and lower limits for experiment_2
df_agg_exp2['lower_RLU_uL'] = df_agg_exp2['mean_RLU_uL'] - df_agg_exp2['std_RLU_uL']
df_agg_exp2['upper_RLU_uL'] = df_agg_exp2['mean_RLU_uL'] + df_agg_exp2['std_RLU_uL']
df_agg_exp2['lower_RLU_uL'] = df_agg_exp2['lower_RLU_uL'].clip(lower=100)

# Display the two DataFrames
df_agg_exp1
df_agg_exp2

Unnamed: 0,virus,cell line,experiment,mean_RLU_uL,std_RLU_uL,lower_RLU_uL,upper_RLU_uL
0,-80 Freeze 37 thaw 1,293T,freeze,133787.4,117474.4,16313.05,251261.8
1,-80 Freeze 37 thaw 1,293T-TIM1,freeze,251605.0,237688.9,13916.1,489293.8
2,-80 Freeze 37 thaw 2,293T,freeze,150228.0,140089.9,10138.13,290317.9
3,-80 Freeze 37 thaw 2,293T-TIM1,freeze,313842.2,299987.9,13854.24,613830.1
4,-80 Freeze RT thaw 1,293T,freeze,1020597.0,430294.8,590302.5,1450892.0
5,-80 Freeze RT thaw 1,293T-TIM1,freeze,1457166.0,565331.9,891833.8,2022498.0
6,-80 Freeze RT thaw 2,293T,freeze,25107870.0,5866690.0,19241180.0,30974560.0
7,-80 Freeze RT thaw 2,293T-TIM1,freeze,61295550.0,15184810.0,46110740.0,76480360.0
8,Flash Freeze 37 thaw 1,293T,freeze,30278400.0,6481805.0,23796600.0,36760200.0
9,Flash Freeze 37 thaw 1,293T-TIM1,freeze,83278880.0,12650080.0,70628800.0,95928960.0


In [40]:
unique_viruses_exp1 = df_agg_exp1['virus'].unique()
print("Unique viruses in df_agg_exp1:")
for virus in unique_viruses_exp1:
    print(virus)

Unique viruses in df_agg_exp1:
F -12
F -16
F -20
F -22
F -4
F -8
F only
G -11
G -15
G -19
G -23
G -27
G -3
G -31
G -35
G -7
G only
VSV-G
bald
full F
full G


## plot ct del

In [41]:
import pandas as pd
import altair as alt

# Define custom virus order
virus_order = [
    "bald",
    "G only",
    "F only",
    "full F",
    "F -4",
    "F -8",
    "F -12",
    "F -16",
    "F -20",
    "F -22",
    "full G",
    "G -3",
    "G -7",
    "G -11",
    "G -15",
    "G -19",
    "G -23",
    "G -27",
    "G -31",
    "G -35",
    "VSV-G"
]

# Ensure category consistency and remove spaces
df_agg_exp1['virus'] = df_agg_exp1['virus'].str.strip()

# Create an explicit numerical order for sorting
virus_order_dict = {virus: i for i, virus in enumerate(virus_order)}
df_agg_exp1['virus_order'] = df_agg_exp1['virus'].map(virus_order_dict)

# Sort DataFrame before passing it to Altair
df_agg_exp1 = df_agg_exp1.sort_values('virus_order')

# Define color and shape mappings
color_mapping = {
    "293T": "#E69F00",       # Orange
    "293T-TIM1": "#999999"   # Gray
}

shape_mapping = {
    "293T": "square",        # Square for 293T
    "293T-TIM1": "circle"    # Circle for 293T-TIM1
}

# Format label expression correctly for JavaScript
label_expr = "{ " + ", ".join(f"{i}: '{v}'" for i, v in enumerate(virus_order)) + " }[datum.value]"

# Base chart with explicit sorting
base = alt.Chart(df_agg_exp1).encode(
    x=alt.X(
        'virus_order:O',  # Use numerical order for sorting
        title='Virus',
        axis=alt.Axis(
            labelAngle=270,
            labelFontSize=12,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=list(range(len(virus_order))),  # Ensures Altair keeps labels
            labelExpr=label_expr  # Maps back to virus names
        )
    ),
    y=alt.Y(
        'mean_RLU_uL:Q',
        title='Mean RLU/uL',
        scale=alt.Scale(type='log', domain=[100, df_agg_exp1['upper_RLU_uL'].max()]),
        axis=alt.Axis(
            labelFontSize=12,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold'
        ),
    ),
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    shape=alt.Shape(
        'cell line:N',
        scale=alt.Scale(domain=list(shape_mapping.keys()), range=list(shape_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('cell line:N', title='Target Cell'),
        alt.Tooltip('mean_RLU_uL:Q', title='Mean RLU/uL'),
        alt.Tooltip('std_RLU_uL:Q', title='Standard Deviation')
    ]
)

# Points for the mean with fixed shape encoding
points = base.mark_point(size=50, filled=True, stroke="black", strokeWidth=1.5, opacity=1).add_params(hover)

# Highlight nearest point
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Error bars with custom color
error_bars = alt.Chart(df_agg_exp1).mark_rule(size=2).encode(
    x='virus_order:O',
    y='lower_RLU_uL:Q',
    y2='upper_RLU_uL:Q',
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
    )
)

# Error bar caps with correct color
error_caps = (
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal').encode(
        x='virus_order:O',
        y='lower_RLU_uL:Q',
        color=alt.Color(
            'cell line:N',
            scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
        )
    ) +
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal').encode(
        x='virus_order:O',
        y='upper_RLU_uL:Q',
        color=alt.Color(
            'cell line:N',
            scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
        )
    )
)

# Combine layers
combined_chart = (points + error_bars + error_caps + highlight).properties(
    width=600,
    height=400
).configure_axis(
    labelFontSize=8,
    labelFontWeight='bold',
    titleFontSize=12,
    titleFontWeight='bold',
    grid=False
).configure_view(
    strokeWidth=2
)

combined_chart.save("03_output/plots/Titer_Ctdel.html")

# Display the chart
combined_chart


In [42]:
print(df_agg_exp1[['virus', 'cell line', 'mean_RLU_uL', 'std_RLU_uL', 'lower_RLU_uL', 'upper_RLU_uL']].sort_values('std_RLU_uL', ascending=False).head(10))
print(df_agg_exp1[df_agg_exp1['lower_RLU_uL'] <= 0])
print(df_agg_exp1.describe())

     virus  cell line   mean_RLU_uL    std_RLU_uL  lower_RLU_uL  upper_RLU_uL
35   VSV-G  293T-TIM1  1.539361e+09  2.607295e+08  1.278632e+09  1.800091e+09
34   VSV-G       293T  2.098488e+08  5.994966e+07  1.498991e+08  2.697985e+08
9     F -4  293T-TIM1  9.818432e+07  1.036261e+07  8.782171e+07  1.085469e+08
23   G -27  293T-TIM1  4.822659e+07  9.546682e+06  3.867991e+07  5.777327e+07
11    F -8  293T-TIM1  4.894560e+07  9.515561e+06  3.943004e+07  5.846116e+07
39  full F  293T-TIM1  6.415408e+07  8.405371e+06  5.574871e+07  7.255945e+07
8     F -4       293T  3.453590e+07  7.794999e+06  2.674090e+07  4.233090e+07
19   G -19  293T-TIM1  4.430240e+07  7.744197e+06  3.655820e+07  5.204660e+07
27   G -31  293T-TIM1  9.045056e+07  6.867753e+06  8.358281e+07  9.731831e+07
26   G -31       293T  2.992109e+07  6.246896e+06  2.367419e+07  3.616798e+07
Empty DataFrame
Columns: [virus, cell line, experiment, mean_RLU_uL, std_RLU_uL, lower_RLU_uL, upper_RLU_uL, virus_order]
Index: []
        me

In [43]:
print(df_agg_exp1[['virus', 'mean_RLU_uL', 'lower_RLU_uL', 'upper_RLU_uL']].dropna())
print(set(df_agg_exp1['virus']) - set(virus_order))  # Show any mismatches
print(df_agg_exp1[['virus', 'virus_order']].drop_duplicates().sort_values('virus_order'))


     virus   mean_RLU_uL  lower_RLU_uL  upper_RLU_uL
36    bald  1.675560e+02  1.000000e+02  2.820479e+02
37    bald  1.615121e+03  3.615038e+02  2.868738e+03
33  G only  1.790296e+03  2.958747e+02  3.284718e+03
32  G only  3.274411e+03  7.322646e+02  5.816558e+03
12  F only  5.992093e+04  4.131773e+04  7.852414e+04
13  F only  1.157871e+07  9.389802e+06  1.376761e+07
39  full F  6.415408e+07  5.574871e+07  7.255945e+07
38  full F  2.368003e+07  1.964877e+07  2.771129e+07
9     F -4  9.818432e+07  8.782171e+07  1.085469e+08
8     F -4  3.453590e+07  2.674090e+07  4.233090e+07
10    F -8  2.761350e+07  2.345181e+07  3.177520e+07
11    F -8  4.894560e+07  3.943004e+07  5.846116e+07
0    F -12  1.225576e+07  1.000584e+07  1.450567e+07
1    F -12  2.745738e+07  2.487073e+07  3.004402e+07
3    F -16  2.825816e+05  4.804276e+04  5.171204e+05
2    F -16  9.110161e+04  4.196075e+04  1.402425e+05
5    F -20  4.802830e+04  1.819246e+04  7.786414e+04
4    F -20  1.085102e+05  3.312580e+04  1.8389

In [44]:
# Ensure category consistency and remove spaces
df_agg_exp1['virus'] = df_agg_exp1['virus'].str.strip()

# Create an explicit numerical order for sorting
virus_order_dict = {virus: i for i, virus in enumerate(virus_order)}
df_agg_exp1['virus_order'] = df_agg_exp1['virus'].map(virus_order_dict)

# Sort DataFrame before passing it to Altair
df_agg_exp1 = df_agg_exp1.sort_values('virus_order')

# Define color and shape mappings
color_mapping = {
    "293T": "#E69F00",       # Orange
    "293T-TIM1": "#999999"   # Gray
}

shape_mapping = {
    "293T": "square",        # Square for 293T
    "293T-TIM1": "circle"    # Circle for 293T-TIM1
}

# Define tick positions
log_ticks = [10**i for i in range(2, 10)]  # 10^2 to 10^9

# Manually define labels using Unicode superscripts
log_labels = ["10²", "10³", "10⁴", "10⁵", "10⁶", "10⁷", "10⁸", "10⁹"]

# Format label expression correctly for JavaScript
label_expr = "{ " + ", ".join(f"{i}: '{v}'" for i, v in enumerate(virus_order)) + " }[datum.value]"

# Base chart with explicit sorting
base = alt.Chart(df_agg_exp1).encode(
    x=alt.X(
        'virus_order:O',  # Use numerical order for sorting
        title='',
        axis=alt.Axis(
            labelAngle=270,
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=list(range(len(virus_order))),  # Ensures Altair keeps labels
            labelExpr=label_expr  # Maps back to virus names
        )
    ),
     y = alt.Y(
        'mean_RLU_uL:Q',
        title='Pseudovirus Titer (RLU/uL)',
        scale=alt.Scale(type='log', domain=[100, df_agg_exp1['upper_RLU_uL'].max()]),
        axis=alt.Axis(
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=log_ticks,  # Force log ticks
            labelExpr="{ " + ", ".join(f"{v}: '{label}'" for v, label in zip(log_ticks, log_labels)) + " }[datum.value]"
        ),
    ),
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    shape=alt.Shape(
        'cell line:N',
        scale=alt.Scale(domain=list(shape_mapping.keys()), range=list(shape_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('cell line:N', title='Target Cell'),
        alt.Tooltip('mean_RLU_uL:Q', title='Pseudovirus Titer (RLU/uL)'),
        alt.Tooltip('std_RLU_uL:Q', title='Standard Deviation')
    ]
)

# Points for the mean with fixed shape encoding
points = base.mark_point(size=80, filled=True, opacity=1).add_params(hover)

# Highlight nearest point
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Error bars with custom color
error_bars = alt.Chart(df_agg_exp1).mark_rule(size=2).encode(
    x='virus_order:O',
    y='lower_RLU_uL:Q',
    y2='upper_RLU_uL:Q',
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
    )
)

# Error bar caps with correct color
error_caps = (
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal').encode(
        x='virus_order:O',
        y='lower_RLU_uL:Q',
        color=alt.Color(
            'cell line:N',
            scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
        )
    ) +
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal').encode(
        x='virus_order:O',
        y='upper_RLU_uL:Q',
        color=alt.Color(
            'cell line:N',
            scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
        )
    )
)

# Combine layers
combined_chart = (points + error_bars + error_caps + highlight).properties(
    width=300,
    height=200
).configure_axis(
    labelFontSize=8,  # Apply globally to be safe
    labelFontWeight='bold',
    titleFontSize=12,
    titleFontWeight='bold',
    grid=False
).configure_view(
    strokeWidth=2
)

combined_chart.save("03_output/plots/Titer_CTdel.html")

# Display the chart
combined_chart

In [45]:

# Define a mapping of actual virus names to custom display names
virus_display_names = {
    "bald": "No F or G",
    "G only": "G only",
    "F only": "F only",
    "full F": "F full CT",
    "F -4": "F 4AA CTdel",
    "F -8": "F 8AA CTdel",
    "F -12": "F 12AA CTdel",
    "F -16": "F 16AA CTdel",
    "F -20": "F 20AA CTdel",
    "F -22": "F 22AA CTdel",
    "VSV-G": "VSV-G"
}

virus_order = [
    "bald",
    "G only",
    "F only",
    "full F",
    "F -4",
    "F -8",
    "F -12",
    "F -16",
    "F -20",
    "F -22",
    "VSV-G"
]

# Filter df_agg_exp1 to include only selected viruses
df_agg_exp1 = df_agg_exp1[df_agg_exp1['virus'].isin(virus_order)].copy()

# Ensure category consistency and remove spaces
df_agg_exp1['virus'] = df_agg_exp1['virus'].str.strip()

# Create an explicit numerical order for sorting
virus_order_dict = {virus: i for i, virus in enumerate(virus_order)}
df_agg_exp1['virus_order'] = df_agg_exp1['virus'].map(virus_order_dict)

# Sort DataFrame before passing it to Altair
df_agg_exp1 = df_agg_exp1.sort_values('virus_order')

# Define tick positions for log-scale y-axis
log_ticks = [10**i for i in range(2, 10)]  # 10^2 to 10^9

# Manually define labels using Unicode superscripts
log_labels = ["10²", "10³", "10⁴", "10⁵", "10⁶", "10⁷", "10⁸", "10⁹"]

# Format label expression correctly for JavaScript
label_expr = "{ " + ", ".join(f"{i}: '{virus_display_names[v]}'" for i, v in enumerate(virus_order)) + " }[datum.value]"


# Base chart with explicit sorting and custom x-axis labels
base = alt.Chart(df_agg_exp1).encode(
    x=alt.X(
        'virus_order:O',  # Use numerical order for sorting
        title='Virus',
        axis=alt.Axis(
            labelAngle=270,
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=list(range(len(virus_order))),  # Ensures Altair keeps labels
            labelExpr=label_expr  # Maps back to custom virus names
        )
    ),
    y=alt.Y(
        'mean_RLU_uL:Q',
        title='Pseudovirus Titer (RLU/uL)',
        scale=alt.Scale(type='log', domain=[100, df_agg_exp1['upper_RLU_uL'].max()]),
        axis=alt.Axis(
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=log_ticks,  # Force log ticks
            labelExpr="{ " + ", ".join(f"{v}: '{label}'" for v, label in zip(log_ticks, log_labels)) + " }[datum.value]"
        ),
    ),
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    shape=alt.Shape(
        'cell line:N',
        scale=alt.Scale(domain=list(shape_mapping.keys()), range=list(shape_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('cell line:N', title='Target Cell'),
        alt.Tooltip('mean_RLU_uL:Q', title='Pseudovirus Titer (RLU/uL)'),
        alt.Tooltip('std_RLU_uL:Q', title='Standard Deviation')
    ]
)

# Points for the mean with fixed shape encoding and black outline
points = base.mark_point(
    size=80, filled=True, opacity=1, stroke='black', strokeWidth=1.5
).add_params(hover)

# Highlight nearest point (still on top)
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Error bars with black color (added first to appear behind)
error_bars = alt.Chart(df_agg_exp1).mark_rule(size=2, color='black').encode(
    x='virus_order:O',
    y='lower_RLU_uL:Q',
    y2='upper_RLU_uL:Q'
)

# Error bar caps with black color (added before points to stay behind)
error_caps = (
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal', color='black').encode(
        x='virus_order:O',
        y='lower_RLU_uL:Q'
    ) +
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal', color='black').encode(
        x='virus_order:O',
        y='upper_RLU_uL:Q'
    )
)

# Combine layers ensuring error bars & caps are behind points
combined_chart = (error_bars + error_caps + points + highlight).properties(
    width=300,
    height=200
).configure_axis(
    labelFontSize=8,  # Apply globally to be safe
    labelFontWeight='bold',
    titleFontSize=12,
    titleFontWeight='bold',
    grid=False
).configure_view(
    strokeWidth=2
)

combined_chart.save("03_output/plots/Titer_CTdel_filtered_F.html")

# Display the chart
combined_chart


In [46]:
# Convert 'RLU/uL' to numeric, forcing errors to NaN
df['RLU/uL'] = pd.to_numeric(df['RLU/uL'], errors='coerce')

# Drop rows where 'RLU/uL' is NaN (i.e., non-numeric values were present)
df = df.dropna(subset=['RLU/uL'])

# Get unique experiment values
experiment_values = df['experiment'].unique()

# Ensure we have at least two unique experiments
if len(experiment_values) < 2:
    raise ValueError("Less than two unique experiment values found in the 'experiment' column.")

# Separate the DataFrame by experiment values
experiment_1, experiment_2 = experiment_values[:2]  # Taking the first two unique values

# Create first df_agg for experiment_1
df_agg_exp1 = df[df['experiment'] == experiment_1].groupby(['virus', 'cell line', 'experiment']).agg(
    mean_RLU_uL=('RLU/uL', 'mean'),
    std_RLU_uL=('RLU/uL', 'std')
).reset_index()

# Calculate upper and lower limits for experiment_1
df_agg_exp1['lower_RLU_uL'] = df_agg_exp1['mean_RLU_uL'] - df_agg_exp1['std_RLU_uL']
df_agg_exp1['upper_RLU_uL'] = df_agg_exp1['mean_RLU_uL'] + df_agg_exp1['std_RLU_uL']
df_agg_exp1['lower_RLU_uL'] = df_agg_exp1['lower_RLU_uL'].clip(lower=100)

# Create second df_agg for experiment_2
df_agg_exp2 = df[df['experiment'] == experiment_2].groupby(['virus', 'cell line', 'experiment']).agg(
    mean_RLU_uL=('RLU/uL', 'mean'),
    std_RLU_uL=('RLU/uL', 'std')
).reset_index()

# Calculate upper and lower limits for experiment_2
df_agg_exp2['lower_RLU_uL'] = df_agg_exp2['mean_RLU_uL'] - df_agg_exp2['std_RLU_uL']
df_agg_exp2['upper_RLU_uL'] = df_agg_exp2['mean_RLU_uL'] + df_agg_exp2['std_RLU_uL']
df_agg_exp2['lower_RLU_uL'] = df_agg_exp2['lower_RLU_uL'].clip(lower=100)

# Display the two DataFrames
df_agg_exp1
df_agg_exp2

Unnamed: 0,virus,cell line,experiment,mean_RLU_uL,std_RLU_uL,lower_RLU_uL,upper_RLU_uL
0,-80 Freeze 37 thaw 1,293T,freeze,133787.4,117474.4,16313.05,251261.8
1,-80 Freeze 37 thaw 1,293T-TIM1,freeze,251605.0,237688.9,13916.1,489293.8
2,-80 Freeze 37 thaw 2,293T,freeze,150228.0,140089.9,10138.13,290317.9
3,-80 Freeze 37 thaw 2,293T-TIM1,freeze,313842.2,299987.9,13854.24,613830.1
4,-80 Freeze RT thaw 1,293T,freeze,1020597.0,430294.8,590302.5,1450892.0
5,-80 Freeze RT thaw 1,293T-TIM1,freeze,1457166.0,565331.9,891833.8,2022498.0
6,-80 Freeze RT thaw 2,293T,freeze,25107870.0,5866690.0,19241180.0,30974560.0
7,-80 Freeze RT thaw 2,293T-TIM1,freeze,61295550.0,15184810.0,46110740.0,76480360.0
8,Flash Freeze 37 thaw 1,293T,freeze,30278400.0,6481805.0,23796600.0,36760200.0
9,Flash Freeze 37 thaw 1,293T-TIM1,freeze,83278880.0,12650080.0,70628800.0,95928960.0


In [47]:
# Define a mapping of actual virus names to custom display names
virus_display_names = {
    "bald": "No F or G",
    "G only": "G only",
    "F only": "F only",
    "full G": "G full CT",
    "G -3": "G 3AA CTdel",
    "G -7": "G 7AA CTdel",
    "G -11": "G 11AA CTdel",
    "G -15": "G 15AA CTdel",
    "G -19": "G 19AA CTdel",
    "G -23": "G 23AA CTdel",
    "G -27": "G 27AA CTdel",
    "G -31": "G 31AA CTdel",
    "G -35": "G 35AA CTdel",
    "VSV-G": "VSV-G"
}
virus_order = [
    "bald",
    "G only",
    "F only",
    "full G",
    "G -3",
    "G -7",
    "G -11",
    "G -15",
    "G -19",
    "G -23",
    "G -27",
    "G -31",
    "G -35",
    "VSV-G"
]

# Filter df_agg_exp1 to include only selected viruses
df_agg_exp1 = df_agg_exp1[df_agg_exp1['virus'].isin(virus_order)].copy()

# Ensure category consistency and remove spaces
df_agg_exp1['virus'] = df_agg_exp1['virus'].str.strip()

# Create an explicit numerical order for sorting
virus_order_dict = {virus: i for i, virus in enumerate(virus_order)}
df_agg_exp1['virus_order'] = df_agg_exp1['virus'].map(virus_order_dict)

# Sort DataFrame before passing it to Altair
df_agg_exp1 = df_agg_exp1.sort_values('virus_order')

# Define tick positions for log-scale y-axis
log_ticks = [10**i for i in range(2, 10)]  # 10^2 to 10^9

# Manually define labels using Unicode superscripts
log_labels = ["10²", "10³", "10⁴", "10⁵", "10⁶", "10⁷", "10⁸", "10⁹"]

# Format label expression correctly for JavaScript using virus_display_names
label_expr = "{ " + ", ".join(f"{i}: '{virus_display_names[v]}'" for i, v in enumerate(virus_order)) + " }[datum.value]"

# Base chart with explicit sorting and custom x-axis labels
base = alt.Chart(df_agg_exp1).encode(
    x=alt.X(
        'virus_order:O',  # Use numerical order for sorting
        title='Virus',
        axis=alt.Axis(
            labelAngle=270,
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=list(range(len(virus_order))),  # Ensures Altair keeps labels
            labelExpr=label_expr  # Maps back to custom virus names
        )
    ),
    y=alt.Y(
        'mean_RLU_uL:Q',
        title='Pseudovirus Titer (RLU/uL)',
        scale=alt.Scale(type='log', domain=[100, df_agg_exp1['upper_RLU_uL'].max()]),
        axis=alt.Axis(
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=log_ticks,  # Force log ticks
            labelExpr="{ " + ", ".join(f"{v}: '{label}'" for v, label in zip(log_ticks, log_labels)) + " }[datum.value]"
        ),
    ),
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    shape=alt.Shape(
        'cell line:N',
        scale=alt.Scale(domain=list(shape_mapping.keys()), range=list(shape_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('cell line:N', title='Target Cell'),
        alt.Tooltip('mean_RLU_uL:Q', title='Pseudovirus Titer (RLU/uL)'),
        alt.Tooltip('std_RLU_uL:Q', title='Standard Deviation')
    ]
)

# Points for the mean with fixed shape encoding and black outline
points = base.mark_point(
    size=80, filled=True, opacity=1, stroke='black', strokeWidth=1.5  # Adds black outline
).add_params(hover)

# Highlight nearest point (still on top)
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Error bars with black color (added first to appear behind)
error_bars = alt.Chart(df_agg_exp1).mark_rule(size=2, color='black').encode(
    x='virus_order:O',
    y='lower_RLU_uL:Q',
    y2='upper_RLU_uL:Q'
)

# Error bar caps with black color (added before points to stay behind)
error_caps = (
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal', color='black').encode(
        x='virus_order:O',
        y='lower_RLU_uL:Q'
    ) +
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal', color='black').encode(
        x='virus_order:O',
        y='upper_RLU_uL:Q'
    )
)

# Combine layers ensuring error bars & caps are behind points
combined_chart = (error_bars + error_caps + points + highlight).properties(
    width=300,
    height=200
).configure_axis(
    labelFontSize=8,  # Apply globally to be safe
    labelFontWeight='bold',
    titleFontSize=12,
    titleFontWeight='bold',
    grid=False
).configure_view(
    strokeWidth=2
)

combined_chart.save("03_output/plots/Titer_CTdel_G_Only.html")

# Display the chart
combined_chart


In [48]:
# Pivot the data to have 'virus' as index and 'cell line' as columns
df_fold_diff = df_agg_exp1.pivot(index='virus', columns='cell line', values='mean_RLU_uL')

# Ensure the expected cell lines exist (adjust based on your dataset)
expected_cells = ['293T', '293T-TIM1']  # Update if needed
df_fold_diff = df_fold_diff[expected_cells]

# Compute the fold difference (293T-TIM1 / 293T)
df_fold_diff['Fold_Difference'] = df_fold_diff['293T-TIM1'] / df_fold_diff['293T']

# Sort by Fold Difference (ascending order)
df_fold_diff = df_fold_diff.sort_values(by='Fold_Difference', ascending=False)

# Reset index to get a clean table
df_fold_diff = df_fold_diff.reset_index()
df_fold_diff

cell line,virus,293T,293T-TIM1,Fold_Difference
0,F only,59920.93,11578710.0,193.233105
1,G -11,255387.6,17758320.0,69.534776
2,G -7,195260.1,10341460.0,52.962497
3,full G,262466.1,13620590.0,51.89467
4,G -3,376442.8,11496020.0,30.538564
5,bald,167.556,1615.121,9.639291
6,VSV-G,209848800.0,1539361000.0,7.335574
7,G -15,3528442.0,25210370.0,7.144901
8,G -19,9332767.0,44302400.0,4.746974
9,G -35,9709503.0,43309120.0,4.460488


## freeze

In [49]:
unique_viruses_exp2 = df_agg_exp2['virus'].unique()
print(df_agg_exp2['virus'].unique())
print(df_agg_exp2)

['-80 Freeze 37 thaw 1' '-80 Freeze 37 thaw 2' '-80 Freeze RT thaw 1'
 '-80 Freeze RT thaw 2' 'Flash Freeze 37 thaw 1' 'Flash Freeze 37 thaw 2'
 'Flash Freeze RT thaw 1' 'Flash Freeze RT thaw 2' 'Fresh 1' 'Fresh 2']
                     virus  cell line experiment   mean_RLU_uL    std_RLU_uL  \
0     -80 Freeze 37 thaw 1       293T     freeze  1.337874e+05  1.174744e+05   
1     -80 Freeze 37 thaw 1  293T-TIM1     freeze  2.516050e+05  2.376889e+05   
2     -80 Freeze 37 thaw 2       293T     freeze  1.502280e+05  1.400899e+05   
3     -80 Freeze 37 thaw 2  293T-TIM1     freeze  3.138422e+05  2.999879e+05   
4     -80 Freeze RT thaw 1       293T     freeze  1.020597e+06  4.302948e+05   
5     -80 Freeze RT thaw 1  293T-TIM1     freeze  1.457166e+06  5.653319e+05   
6     -80 Freeze RT thaw 2       293T     freeze  2.510787e+07  5.866690e+06   
7     -80 Freeze RT thaw 2  293T-TIM1     freeze  6.129555e+07  1.518481e+07   
8   Flash Freeze 37 thaw 1       293T     freeze  3.027840e+07  

In [50]:
# Define mapping of virus names to categories
virus_categories = {
    '-80 Freeze 37 thaw': '-80 Freeze 37 thaw',
    '-80 Freeze RT thaw': '-80 Freeze RT thaw',
    'Flash Freeze 37 thaw': 'Flash Freeze 37 thaw',
    'Flash Freeze RT thaw': 'Flash Freeze RT thaw',
    'Fresh': 'Fresh'
}

# Create a new column 'virus_category' based on the mapping
df_agg_exp2['virus_category'] = df_agg_exp2['virus'].apply(
    lambda x: next((v for k, v in virus_categories.items() if k in x), 'Other')
)

# Group by 'virus_category' and compute the mean RLU/uL
df_freeze_summary = df_agg_exp2.groupby('virus_category', as_index=False)['mean_RLU_uL'].mean()
df_freeze_summary

Unnamed: 0,virus_category,mean_RLU_uL
0,-80 Freeze 37 thaw,212365.6
1,-80 Freeze RT thaw,22220300.0
2,Flash Freeze 37 thaw,57062740.0
3,Flash Freeze RT thaw,52289230.0
4,Fresh,72016240.0


In [51]:
import pandas as pd
import altair as alt

# Define custom virus order
virus_order = [
    "Fresh 1",
    "Fresh 2",
    "Flash Freeze 37 thaw 1",
    "Flash Freeze 37 thaw 2",
    "Flash Freeze RT thaw 1",
    "Flash Freeze RT thaw 2",
    "-80 Freeze 37 thaw 1",
    "-80 Freeze 37 thaw 2",
    "-80 Freeze RT thaw 1",
    "-80 Freeze RT thaw 2",
]

# Ensure category consistency and remove spaces
df_agg_exp2['virus'] = df_agg_exp2['virus'].str.strip()

# Create an explicit numerical order for sorting
virus_order_dict = {virus: i for i, virus in enumerate(virus_order)}
df_agg_exp2['virus_order'] = df_agg_exp2['virus'].map(virus_order_dict)

# Sort DataFrame before passing it to Altair
df_agg_exp2 = df_agg_exp2.sort_values('virus_order')

# Define color and shape mappings
color_mapping = {
    "293T": "#E69F00",       # Orange
    "293T-TIM1": "#999999"   # Gray
}

shape_mapping = {
    "293T": "square",        # Square for 293T
    "293T-TIM1": "circle"    # Circle for 293T-TIM1
}

# Format label expression correctly for JavaScript
label_expr = "{ " + ", ".join(f"{i}: '{v}'" for i, v in enumerate(virus_order)) + " }[datum.value]"

# Base chart with explicit sorting
base = alt.Chart(df_agg_exp2).encode(
    x=alt.X(
        'virus_order:O',  # Use numerical order for sorting
        title='Virus',
        axis=alt.Axis(
            labelAngle=270,
            labelFontSize=12,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=list(range(len(virus_order))),  # Ensures Altair keeps labels
            labelExpr=label_expr  # Maps back to virus names
        )
    ),
    y=alt.Y(
        'mean_RLU_uL:Q',
        title='Mean RLU/uL',
        scale=alt.Scale(type='log', domain=[100, df_agg_exp2['upper_RLU_uL'].max()]),
        axis=alt.Axis(
            labelFontSize=12,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold'
        ),
    ),
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    shape=alt.Shape(
        'cell line:N',
        scale=alt.Scale(domain=list(shape_mapping.keys()), range=list(shape_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('cell line:N', title='Target Cell'),
        alt.Tooltip('mean_RLU_uL:Q', title='Mean RLU/uL'),
        alt.Tooltip('std_RLU_uL:Q', title='Standard Deviation')
    ]
)

# Points for the mean with fixed shape encoding
points = base.mark_point(size=50, filled=True, stroke="black", strokeWidth=1.5, opacity=1).add_params(hover)

# Highlight nearest point
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Error bars with custom color
error_bars = alt.Chart(df_agg_exp2).mark_rule(size=2).encode(
    x='virus_order:O',
    y='lower_RLU_uL:Q',
    y2='upper_RLU_uL:Q',
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
    )
)

# Error bar caps with correct color
error_caps = (
    alt.Chart(df_agg_exp2).mark_tick(size=12, thickness=2, orient='horizontal').encode(
        x='virus_order:O',
        y='lower_RLU_uL:Q',
        color=alt.Color(
            'cell line:N',
            scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
        )
    ) +
    alt.Chart(df_agg_exp2).mark_tick(size=12, thickness=2, orient='horizontal').encode(
        x='virus_order:O',
        y='upper_RLU_uL:Q',
        color=alt.Color(
            'cell line:N',
            scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
        )
    )
)

# Combine layers
combined_chart = (points + error_bars + error_caps + highlight).properties(
    width=600,
    height=400
).configure_axis(
    labelFontSize=8,
    labelFontWeight='bold',
    titleFontSize=12,
    titleFontWeight='bold',
    grid=False
).configure_view(
    strokeWidth=2
)

combined_chart.save("03_output/plots/Titer_freeze.html")

# Display the chart
combined_chart


In [52]:
# Ensure category consistency and remove spaces
df_agg_exp2['virus'] = df_agg_exp2['virus'].str.strip()

# Create an explicit numerical order for sorting
virus_order_dict = {virus: i for i, virus in enumerate(virus_order)}
df_agg_exp2['virus_order'] = df_agg_exp2['virus'].map(virus_order_dict)

# Sort DataFrame before passing it to Altair
df_agg_exp2 = df_agg_exp2.sort_values('virus_order')

# Define color and shape mappings
color_mapping = {
    "293T": "#E69F00",       # Orange
    "293T-TIM1": "#999999"   # Gray
}

shape_mapping = {
    "293T": "square",        # Square for 293T
    "293T-TIM1": "circle"    # Circle for 293T-TIM1
}

# Define tick positions
log_ticks = [10**i for i in range(2, 10)]  # 10^2 to 10^9

# Manually define labels using Unicode superscripts
log_labels = ["10²", "10³", "10⁴", "10⁵", "10⁶", "10⁷", "10⁸", "10⁹"]

# Format label expression correctly for JavaScript
label_expr = "{ " + ", ".join(f"{i}: '{v}'" for i, v in enumerate(virus_order)) + " }[datum.value]"

# Base chart with explicit sorting
base = alt.Chart(df_agg_exp2).encode(
    x=alt.X(
        'virus_order:O',  # Use numerical order for sorting
        title='',
        axis=alt.Axis(
            labelAngle=270,
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=list(range(len(virus_order))),  # Ensures Altair keeps labels
            labelExpr=label_expr  # Maps back to virus names
        )
    ),
     y = alt.Y(
        'mean_RLU_uL:Q',
        title='Pseudovirus Titer (RLU/uL)',
        scale=alt.Scale(type='log', domain=[100, df_agg_exp2['upper_RLU_uL'].max()]),
        axis=alt.Axis(
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=log_ticks,  # Force log ticks
            labelExpr="{ " + ", ".join(f"{v}: '{label}'" for v, label in zip(log_ticks, log_labels)) + " }[datum.value]"
        ),
    ),
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    shape=alt.Shape(
        'cell line:N',
        scale=alt.Scale(domain=list(shape_mapping.keys()), range=list(shape_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('cell line:N', title='Target Cell'),
        alt.Tooltip('mean_RLU_uL:Q', title='Pseudovirus Titer (RLU/uL)'),
        alt.Tooltip('std_RLU_uL:Q', title='Standard Deviation')
    ]
)

# Points for the mean with fixed shape encoding and black outline
points = base.mark_point(
    size=80, filled=True, opacity=1, stroke='black', strokeWidth=1.5  # Adds black outline
).add_params(hover)

# Highlight nearest point (still on top)
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Error bars with black color (added first to appear behind)
error_bars = alt.Chart(df_agg_exp2).mark_rule(size=2, color='black').encode(
    x='virus_order:O',
    y='lower_RLU_uL:Q',
    y2='upper_RLU_uL:Q'
)

# Error bar caps with black color (added before points to stay behind)
error_caps = (
    alt.Chart(df_agg_exp2).mark_tick(size=12, thickness=2, orient='horizontal', color='black').encode(
        x='virus_order:O',
        y='lower_RLU_uL:Q'
    ) +
    alt.Chart(df_agg_exp2).mark_tick(size=12, thickness=2, orient='horizontal', color='black').encode(
        x='virus_order:O',
        y='upper_RLU_uL:Q'
    )
)

# Combine layers ensuring error bars & caps are behind points
combined_chart = (error_bars + error_caps + points + highlight).properties(
    width=300,
    height=200
).configure_axis(
    labelFontSize=8,  # Apply globally to be safe
    labelFontWeight='bold',
    titleFontSize=12,
    titleFontWeight='bold',
    grid=False
).configure_view(
    strokeWidth=2
)

combined_chart.save("03_output/plots/Titer_Freeze1.html")

# Display the chart
combined_chart


## small subset for supplement CT del 
293T ve TIM1 with bald, G only F only and G-32

In [53]:
# Convert 'RLU/uL' to numeric, forcing errors to NaN
df['RLU/uL'] = pd.to_numeric(df['RLU/uL'], errors='coerce')

# Drop rows where 'RLU/uL' is NaN (i.e., non-numeric values were present)
df = df.dropna(subset=['RLU/uL'])

# Get unique experiment values
experiment_values = df['experiment'].unique()

# Ensure we have at least two unique experiments
if len(experiment_values) < 2:
    raise ValueError("Less than two unique experiment values found in the 'experiment' column.")

# Separate the DataFrame by experiment values
experiment_1, experiment_2 = experiment_values[:2]  # Taking the first two unique values

# Create first df_agg for experiment_1
df_agg_exp1 = df[df['experiment'] == experiment_1].groupby(['virus', 'cell line', 'experiment']).agg(
    mean_RLU_uL=('RLU/uL', 'mean'),
    std_RLU_uL=('RLU/uL', 'std')
).reset_index()

# Calculate upper and lower limits for experiment_1
df_agg_exp1['lower_RLU_uL'] = df_agg_exp1['mean_RLU_uL'] - df_agg_exp1['std_RLU_uL']
df_agg_exp1['upper_RLU_uL'] = df_agg_exp1['mean_RLU_uL'] + df_agg_exp1['std_RLU_uL']
df_agg_exp1['lower_RLU_uL'] = df_agg_exp1['lower_RLU_uL'].clip(lower=100)

# Create second df_agg for experiment_2
df_agg_exp2 = df[df['experiment'] == experiment_2].groupby(['virus', 'cell line', 'experiment']).agg(
    mean_RLU_uL=('RLU/uL', 'mean'),
    std_RLU_uL=('RLU/uL', 'std')
).reset_index()

# Calculate upper and lower limits for experiment_2
df_agg_exp2['lower_RLU_uL'] = df_agg_exp2['mean_RLU_uL'] - df_agg_exp2['std_RLU_uL']
df_agg_exp2['upper_RLU_uL'] = df_agg_exp2['mean_RLU_uL'] + df_agg_exp2['std_RLU_uL']
df_agg_exp2['lower_RLU_uL'] = df_agg_exp2['lower_RLU_uL'].clip(lower=100)

# Display the two DataFrames
df_agg_exp1
df_agg_exp2

Unnamed: 0,virus,cell line,experiment,mean_RLU_uL,std_RLU_uL,lower_RLU_uL,upper_RLU_uL
0,-80 Freeze 37 thaw 1,293T,freeze,133787.4,117474.4,16313.05,251261.8
1,-80 Freeze 37 thaw 1,293T-TIM1,freeze,251605.0,237688.9,13916.1,489293.8
2,-80 Freeze 37 thaw 2,293T,freeze,150228.0,140089.9,10138.13,290317.9
3,-80 Freeze 37 thaw 2,293T-TIM1,freeze,313842.2,299987.9,13854.24,613830.1
4,-80 Freeze RT thaw 1,293T,freeze,1020597.0,430294.8,590302.5,1450892.0
5,-80 Freeze RT thaw 1,293T-TIM1,freeze,1457166.0,565331.9,891833.8,2022498.0
6,-80 Freeze RT thaw 2,293T,freeze,25107870.0,5866690.0,19241180.0,30974560.0
7,-80 Freeze RT thaw 2,293T-TIM1,freeze,61295550.0,15184810.0,46110740.0,76480360.0
8,Flash Freeze 37 thaw 1,293T,freeze,30278400.0,6481805.0,23796600.0,36760200.0
9,Flash Freeze 37 thaw 1,293T-TIM1,freeze,83278880.0,12650080.0,70628800.0,95928960.0


In [54]:
# Define the new list of viruses to include and their order
virus_order = [
    "bald",
    "G only",
    "F only",
    "G -31"
]

# Filter df_agg_exp1 to include only selected viruses
df_agg_exp1 = df_agg_exp1[df_agg_exp1['virus'].isin(virus_order)].copy()

# Ensure category consistency and remove spaces
df_agg_exp1['virus'] = df_agg_exp1['virus'].str.strip()

# Create an explicit numerical order for sorting
virus_order_dict = {virus: i for i, virus in enumerate(virus_order)}
df_agg_exp1['virus_order'] = df_agg_exp1['virus'].map(virus_order_dict)

# Sort DataFrame before passing it to Altair
df_agg_exp1 = df_agg_exp1.sort_values('virus_order')

# Define tick positions for log-scale y-axis
log_ticks = [10**i for i in range(2, 10)]  # 10^2 to 10^9

# Manually define labels using Unicode superscripts
log_labels = ["10²", "10³", "10⁴", "10⁵", "10⁶", "10⁷", "10⁸", "10⁹"]

# Format label expression correctly for JavaScript
label_expr = "{ " + ", ".join(f"{i}: '{v}'" for i, v in enumerate(virus_order)) + " }[datum.value]"

# Base chart with explicit sorting
base = alt.Chart(df_agg_exp1).encode(
    x=alt.X(
        'virus_order:O',  # Use numerical order for sorting
        title='Virus',
        axis=alt.Axis(
            labelAngle=270,
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=list(range(len(virus_order))),  # Ensures Altair keeps labels
            labelExpr=label_expr  # Maps back to virus names
        )
    ),
    y=alt.Y(
        'mean_RLU_uL:Q',
        title='Pseudovirus Titer (RLU/uL)',
        scale=alt.Scale(type='log', domain=[100, df_agg_exp1['upper_RLU_uL'].max()]),
        axis=alt.Axis(
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=log_ticks,  # Force log ticks
            labelExpr="{ " + ", ".join(f"{v}: '{label}'" for v, label in zip(log_ticks, log_labels)) + " }[datum.value]"
        ),
    ),
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    shape=alt.Shape(
        'cell line:N',
        scale=alt.Scale(domain=list(shape_mapping.keys()), range=list(shape_mapping.values())),
        legend=alt.Legend(title='Target Cell')
    ),
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('cell line:N', title='Target Cell'),
        alt.Tooltip('mean_RLU_uL:Q', title='Pseudovirus Titer (RLU/uL)'),
        alt.Tooltip('std_RLU_uL:Q', title='Standard Deviation')
    ]
)

# Points for the mean with fixed shape encoding
points = base.mark_point(size=80, filled=True, opacity=1).add_params(hover)

# Highlight nearest point
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Error bars with custom color
error_bars = alt.Chart(df_agg_exp1).mark_rule(size=2).encode(
    x='virus_order:O',
    y='lower_RLU_uL:Q',
    y2='upper_RLU_uL:Q',
    color=alt.Color(
        'cell line:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
    )
)

# Error bar caps with correct color
error_caps = (
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal').encode(
        x='virus_order:O',
        y='lower_RLU_uL:Q',
        color=alt.Color(
            'cell line:N',
            scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
        )
    ) +
    alt.Chart(df_agg_exp1).mark_tick(size=12, thickness=2, orient='horizontal').encode(
        x='virus_order:O',
        y='upper_RLU_uL:Q',
        color=alt.Color(
            'cell line:N',
            scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values()))
        )
    )
)

# Combine layers
combined_chart = (points + error_bars + error_caps + highlight).properties(
    width=200,
    height=200
).configure_axis(
    labelFontSize=8,  # Apply globally to be safe
    labelFontWeight='bold',
    titleFontSize=12,
    titleFontWeight='bold',
    grid=False
).configure_view(
    strokeWidth=2
)

combined_chart.save("03_output/plots/Titer_CTdel_supplement.html")

# Display the chart
combined_chart
