In [1]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import neutcurve
import scipy.stats
print(f"Using `neutcurve` version {neutcurve.__version__}")
import sys
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from scipy.stats import linregress

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

#import altair themes from /data/custom_analyses_data/theme.py and enable
import theme
alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')
from pathlib import Path

repo_root = Path.cwd().parent
os.chdir(repo_root)
print(os.getcwd())
#print(os.listdir("raw_plate_reader"))
plt.rcParams['svg.fonttype'] = 'none' #from bernadeta, for correct font svg output

Using `neutcurve` version 2.1.0
/fh/fast/bloom_j/computational_notebooks/tmcmahon/2024/02_RSV/RSV_evolution_neut


In [6]:
# Load and clean data
df = pd.read_csv("01_data/other/2025.05.21_Band_Quantification.csv")
df = df.rename(columns={"AUC normalized to p24": "Normalized AUC"})

# Create shortened condition label
df['Condition'] = df['Protein Measured'] + ' - ' + df['Pseudovirus'].str.replace(r"RSV Long F \+ ", "", regex=True)

# Add a visual spacer between G and F conditions
condition_order = [
    'G - G 31AACTdel',
    'G - G fullCT',
    ' ',  # spacer,
    'F - G 31AACTdel',
    'F - G fullCT',
    
]

# Insert dummy spacer rows
spacer_rows = []
for rep in df['Replicate'].unique():
    spacer_rows.append({
        'Protein Measured': None,
        'Pseudovirus': None,
        'Condition': ' ',
        'Normalized AUC': None,
        'Replicate': rep
    })
df = pd.concat([df, pd.DataFrame(spacer_rows)], ignore_index=True)

# Chart generator
def make_chart(replicate, tick_count=None, show_y_title=True):
    df_rep = df[df['Replicate'] == replicate]

    # Configure y-axis
    y_axis = alt.Axis(
        grid=False,
        labelFontSize=16,
        labelFont='Arial',
        titleFont='Arial',
        titleFontSize=18 if show_y_title else 0,
        title='Normalized AUC' if show_y_title else None
    )
    if tick_count:
        y_axis = y_axis.copy()
        y_axis.tickCount = tick_count

    return alt.Chart(df_rep).mark_bar(size=35).encode(
        x=alt.X('Condition:N',
                sort=condition_order,
                axis=alt.Axis(
                    labelAngle=270,
                    labelAlign='right',
                    labelBaseline='middle',
                    title='',
                    labelFontSize=16,
                    labelFont='Arial'
                )),
        y=alt.Y('Normalized AUC:Q', axis=y_axis),
        color=alt.Color('Protein Measured:N',
                        title='Protein',
                        sort=['G', 'F'],  # G first in legend
                        legend=alt.Legend(
                            labelFont='Arial',
                            labelFontSize=14,
                            titleFont='Arial',
                            titleFontSize=16)),
        tooltip=['Protein Measured', 'Pseudovirus', 'Normalized AUC']
    ).properties(
        width=200,
        height=200,
        title=alt.TitleParams(
            text=f'Replicate {replicate}',
            font='Arial',
            fontSize=18
        )
    )

# Create and display charts
chart1 = make_chart(1, show_y_title=True)
chart2 = make_chart(2, tick_count=4, show_y_title=False)
combined=chart1 | chart2
combined.save("03_output/plots/Western_NormalizedAUC.html")
combined

  df = pd.concat([df, pd.DataFrame(spacer_rows)], ignore_index=True)


In [4]:
import pandas as pd
import altair as alt

# Load and clean data
df = pd.read_csv("01_data/other/2025.05.21_Band_Quantification.csv")
df = df.rename(columns={"AUC normalized to p24": "Normalized AUC"})

# Create shortened condition label
df['Condition'] = df['Protein Measured'] + ' - ' + df['Pseudovirus'].str.replace(r"RSV Long F \+ ", "", regex=True)

# Filter to just G and F proteins and valid conditions
df = df[df['Protein Measured'].isin(['G', 'F'])]
valid_conditions = ['G - G 31AACTdel', 'G - G fullCT', 'F - G 31AACTdel', 'F - G fullCT']
df = df[df['Condition'].isin(valid_conditions)]

# Create clean x-axis labels (no replicate info)
df['x_label'] = df['Condition']

# Define visual grouping with spacers
x_order = [
    'G - G 31AACTdel (R1)',
    'G - G fullCT (R1)',
    ' ',  # spacer between replicates
    'G - G 31AACTdel (R2)',
    'G - G fullCT (R2)',
    '  ',  # wider space before F
    'F - G 31AACTdel (R1)',
    'F - G fullCT (R1)',
    '   ',  # spacer
    'F - G 31AACTdel (R2)',
    'F - G fullCT (R2)'
]

# Create Group column with replicate info for sorting
def make_group(row):
    suffix = ' (R1)' if row['Replicate'] == 1 else ' (R2)'
    return row['Condition'] + suffix

df['Group'] = df.apply(make_group, axis=1)

# Add dummy spacer rows with null values to create gaps
for label in [' ', '  ', '   ']:
    df = pd.concat([df, pd.DataFrame([{
        'Protein Measured': None,
        'Pseudovirus': None,
        'Condition': None,
        'Normalized AUC': None,
        'Replicate': None,
        'x_label': label,
        'Group': label
    }])], ignore_index=True)

# Plot
combined_chart = alt.Chart(df).mark_bar(size=35).encode(
    x=alt.X('Group:N',
            sort=x_order,
            axis=alt.Axis(
                labelExpr="replace(datum.value, ' \\(R1\\)| \\(R2\\)', '')",
                labelAngle=270,
                labelFontSize=14,
                labelFont='Arial',
                title=''
            )),
    y=alt.Y('Normalized AUC:Q',
            axis=alt.Axis(
                grid=False,
                labelFontSize=16,
                labelFont='Arial',
                titleFont='Arial',
                titleFontSize=18,
                title='Band intensity / loading control'
            )),
    color=alt.Color('Protein Measured:N',
                    title='Protein',
                    sort=['G', 'F'],
                    legend=alt.Legend(
                        labelFont='Arial',
                        labelFontSize=14,
                        titleFont='Arial',
                        titleFontSize=16)),
    tooltip=['Protein Measured', 'Condition', 'Replicate', 'Normalized AUC']
).properties(
    width=450,
    height=300,
    title=alt.TitleParams(
        text='G and F Conditions by Replicate',
        font='Arial',
        fontSize=20
    )
)

combined_chart


  df = pd.concat([df, pd.DataFrame([{
  df = pd.concat([df, pd.DataFrame([{
  df = pd.concat([df, pd.DataFrame([{
