In [1]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import neutcurve
import scipy.stats
print(f"Using `neutcurve` version {neutcurve.__version__}")
import sys

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

#import altair themes from /data/custom_analyses_data/theme.py and enable
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')
os.chdir('/fh/fast/bloom_j/computational_notebooks/tmcmahon/2024/02_RSV/RSV_evolution_neut') #sets working directory to repo/project root folder


Using `neutcurve` version 2.1.0


In [2]:
df = pd.read_csv('01_data/other/RSV_TU_Titer.csv')
df['virus'] = df['virus'].str.strip()
print(df['virus'].unique())
df

['No RSV' 'RSV Long' 'RSV A2' 'RSV B1' 'VSV-G']


Unnamed: 0,virus,replicate,TU/mL,target cell,virus_order
0,No RSV,1,5857,293T-TIM1,1
1,No RSV,2,5199,293T-TIM1,1
2,RSV Long,1,654711,293T-TIM1,2
3,RSV Long,2,635130,293T-TIM1,2
4,RSV A2,1,186092,293T-TIM1,3
5,RSV A2,2,169543,293T-TIM1,3
6,RSV B1,1,343078,293T-TIM1,4
7,RSV B1,2,363719,293T-TIM1,4
8,No RSV,1,841,293T,1
9,No RSV,2,972,293T,1


In [3]:
df.head

<bound method NDFrame.head of        virus  replicate     TU/mL target cell  virus_order
0     No RSV          1      5857   293T-TIM1            1
1     No RSV          2      5199   293T-TIM1            1
2   RSV Long          1    654711   293T-TIM1            2
3   RSV Long          2    635130   293T-TIM1            2
4     RSV A2          1    186092   293T-TIM1            3
5     RSV A2          2    169543   293T-TIM1            3
6     RSV B1          1    343078   293T-TIM1            4
7     RSV B1          2    363719   293T-TIM1            4
8     No RSV          1       841        293T            1
9     No RSV          2       972        293T            1
10  RSV Long          1    173162        293T            2
11  RSV Long          2    176191        293T            2
12    RSV A2          1     34519        293T            3
13    RSV A2          2     33651        293T            3
14    RSV B1          1     81270        293T            4
15    RSV B1          2   

In [4]:
# Step 3: Define a manual order for the viruses
virus_order = ['No RSV', 'RSV Long', 'RSV A2','RSV B1', 'VSV-G']  # Update as needed


In [5]:
# Define a function to configure the chart style
def configure_chart(chart):
    return chart.configure_axis(
        labelFontSize=14,
        titleFontSize=14,
        grid=False
    ).configure_view(
        strokeWidth=2
    )

# Define a selection for hover
hover = alt.selection_point(fields=["virus", "replicate"], nearest=True, on="mouseover", empty="none")

# Create the base chart
base = alt.Chart(df).encode(
    x=alt.X(
        'virus:N',
        title='Virus',
        axis=alt.Axis(labelAngle=45)
    ),
    y=alt.Y(
        'TU/mL:Q',
        title='Pseudovirus Titer (TU/mL)',
        scale=alt.Scale(type='log')  # Optional: Use log scale if needed
    ),
    color=alt.Color(
        'replicate:N',
        title='Replicate',
        legend=alt.Legend(title="Replicate")
    ),
    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('target cell:N', title='Target Cell'),
        alt.Tooltip('replicate:N', title='Replicate'),
        alt.Tooltip('dilution:Q', title='Dilution'),
        alt.Tooltip('RLU:Q', title='RLU'),
        alt.Tooltip('uL:Q', title='uL'),
        alt.Tooltip('RLU/uL:Q', title='RLU/uL')
    ]
)

# Points and hover logic
points = base.mark_point(size=80, filled=True).add_params(hover)

# Add a rule (highlight nearest point)
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Combine points and highlight, and facet by 'target cell'
faceted_chart = (
    (points + highlight)
    .properties(width=300, height=300)
    .facet(
        facet=alt.Facet('target cell:N', title='Target Cell')
    )
)

# Configure and display the chart
final_chart = configure_chart(faceted_chart)
final_chart


## add error to new df 

In [6]:
# Compute mean and error bars
df_agg = df.groupby(['virus', 'target cell']).agg(
    mean_TU_mL=('TU/mL', 'mean'),
    std_TU_mL=('TU/mL', 'std')  # Standard deviation for error bars
).reset_index()

# Calculate upper and lower limits
df_agg['lower_TU_mL'] = df_agg['mean_TU_mL'] - df_agg['std_TU_mL']
df_agg['upper_TU_mL'] = df_agg['mean_TU_mL'] + df_agg['std_TU_mL']

# Display the final aggregated DataFrame
df_agg.head()

Unnamed: 0,virus,target cell,mean_TU_mL,std_TU_mL,lower_TU_mL,upper_TU_mL
0,No RSV,293T,906.5,92.630988,813.869012,999.130988
1,No RSV,293T-TIM1,5528.0,465.276262,5062.723738,5993.276262
2,RSV A2,293T,34085.0,613.768686,33471.231314,34698.768686
3,RSV A2,293T-TIM1,177817.5,11701.910122,166115.589878,189519.410122
4,RSV B1,293T,83312.0,2887.824094,80424.175906,86199.824094


In [7]:
print("Virus Order List:", virus_order)
print("Unique Viruses in df_agg:", df_agg['virus'].unique())


Virus Order List: ['No RSV', 'RSV Long', 'RSV A2', 'RSV B1', 'VSV-G']
Unique Viruses in df_agg: ['No RSV' 'RSV A2' 'RSV B1' 'RSV Long' 'VSV-G']


In [8]:
df_agg['virus'] = pd.Categorical(df_agg['virus'], categories=virus_order, ordered=True)
df_agg = df_agg.sort_values('virus', ascending=True)


In [9]:
print(df_agg.columns)  # Check all available columns
print(df_agg.head())   # Inspect data

Index(['virus', 'target cell', 'mean_TU_mL', 'std_TU_mL', 'lower_TU_mL',
       'upper_TU_mL'],
      dtype='object')
      virus target cell  mean_TU_mL     std_TU_mL    lower_TU_mL  \
0    No RSV        293T       906.5     92.630988     813.869012   
1    No RSV   293T-TIM1      5528.0    465.276262    5062.723738   
6  RSV Long        293T    174676.5   2141.826440  172534.673560   
7  RSV Long   293T-TIM1    644920.5  13845.857882  631074.642118   
2    RSV A2        293T     34085.0    613.768686   33471.231314   

     upper_TU_mL  
0     999.130988  
1    5993.276262  
6  176818.326440  
7  658766.357882  
2   34698.768686  


## final plot fig 1

In [11]:
import pandas as pd
import altair as alt

# Define custom virus order
virus_order = [
    "No RSV", "RSV Long", "RSV A2", "RSV B1", "VSV-G"
]

# Ensure category consistency and remove spaces
df_agg['virus'] = df_agg['virus'].str.strip()

# Create an explicit numerical order for sorting
virus_order_dict = {virus: i for i, virus in enumerate(virus_order)}
df_agg['virus_order'] = df_agg['virus'].map(virus_order_dict)

# Sort DataFrame before passing it to Altair
df_agg = df_agg.sort_values('virus_order')

# Define color mapping to ensure 293T-TIM1 remains gray
color_mapping = {
    "293T": "#E69F00",       # Orange
    "293T-TIM1": "#999999"   # Gray
}

# Define shape mapping
shape_mapping = {
    "293T": "square",
    "293T-TIM1": "circle"
}

# Define tick positions
log_ticks = [10**i for i in range(2, 8)]  # 10^2 to 10^7

# Manually define labels using Unicode superscripts
log_labels = ["10²", "10³", "10⁴", "10⁵", "10⁶", "10⁷"]

# Format label expression correctly for JavaScript
label_expr = "{ " + ", ".join(f"{i}: '{v}'" for i, v in enumerate(virus_order)) + " }[datum.value]"

# Base chart with explicit sorting
base = alt.Chart(df_agg).encode(
    x=alt.X(
        'virus_order:O',  # Use numerical order for sorting
        title='',
        axis=alt.Axis(
            labelAngle=270,
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=list(range(len(virus_order))),  # Ensures Altair keeps labels
            labelExpr=label_expr  # Maps back to virus names
        )
    ),
    y=alt.Y(
        'mean_TU_mL:Q',
        title='Pseudovirus Titer (TU/mL)',
        scale=alt.Scale(type='log', domain=[100, df_agg['upper_TU_mL'].max()]),
        axis=alt.Axis(
            labelFontSize=14,
            labelFontWeight='bold',
            titleFontSize=16,
            titleFontWeight='bold',
            values=log_ticks,  # Force log ticks
            labelExpr="{ " + ", ".join(f"{v}: '{label}'" for v, label in zip(log_ticks, log_labels)) + " }[datum.value]"
        ),
    ),
    color=alt.Color(
        'target cell:N',
        scale=alt.Scale(domain=list(color_mapping.keys()), range=list(color_mapping.values())),
        legend=alt.Legend(title='Target Cell')  # KEEP LEGEND
    ),

    shape=alt.Shape(
        'target cell:N',
        scale=alt.Scale(domain=list(shape_mapping.keys()), range=list(shape_mapping.values())),
        legend=alt.Legend(title='Target Cell')  # KEEP LEGEND
    ),

    tooltip=[
        alt.Tooltip('virus:N', title='Virus'),
        alt.Tooltip('target cell:N', title='Target Cell'),
        alt.Tooltip('mean_TU_mL:Q', title='Pseudovirus Titer (TU/mL)'),
        alt.Tooltip('std_TU_mL:Q', title='Standard Deviation')
    ]
)

# Error bars with black color (added first to appear behind)
error_bars = alt.Chart(df_agg).mark_rule(size=2, color='black').encode(
    x='virus_order:O',
    y='lower_TU_mL:Q',
    y2='upper_TU_mL:Q'
)

# Error bar caps with black color (added before points to stay behind)
error_caps = (
    alt.Chart(df_agg).mark_tick(size=12, thickness=2, orient='horizontal', color='black').encode(
        x='virus_order:O',
        y='lower_TU_mL:Q'
    ) +
    alt.Chart(df_agg).mark_tick(size=12, thickness=2, orient='horizontal', color='black').encode(
        x='virus_order:O',
        y='upper_TU_mL:Q'
    )
)

# Points for the mean with fixed shape encoding and black outline (added last to stay on top)
points = base.mark_point(size=100, filled=True, opacity=1, stroke='black', strokeWidth=1.5).add_params(hover)

# Highlight nearest point (still on top)
highlight = base.transform_filter(hover).mark_circle(size=200, color="red")

# Combine layers ensuring error bars & caps are behind points
combined_chart = (error_bars + error_caps + points + highlight).properties(
    width=150,
    height=200
).configure_axis(
    labelFontSize=8,  # Apply globally to be safe
    labelFontWeight='bold',
    titleFontSize=12,
    titleFontWeight='bold',
    grid=False
).configure_view(
    strokeWidth=2
)

combined_chart.save("03_output/plots/Titer_TU-mL.html")

# Display the chart
combined_chart
