# Moderna Phase I vaccine samples
In this notebook, I read in the sample information, link with key sent by NIH, and try to figure out which samples are ours.

In [1]:
import collections
import itertools
import math
import os
import re
import string
import warnings
import xml.etree.ElementTree as ElementTree

from IPython.display import display, HTML
from IPython.display import display, SVG
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib as mpl
import matplotlib.pyplot as plt

import natsort
import numpy as np
import pandas as pd
from plotnine import *

import neutcurve
from neutcurve.colorschemes import CBMARKERS, CBPALETTE
import seaborn

import svgutils

import yaml

Make results directory

In [2]:
resultsdir='results/'
os.makedirs(resultsdir, exist_ok=True)

In [3]:
box1 = (pd.read_csv('data/Moderna_Box1.csv')
        .rename(columns={'Specimen #':'Specimen ID'})
        .assign(box='box1')
       )
box2 = (pd.read_csv('data/Moderna_Box2.csv')
        .rename(columns={'Specimen #':'Specimen ID'})
        .assign(box='box2')
       )
sample_key = pd.read_csv('data/moderna_sample_key.csv')

cohort_to_age = {2:'18-55y', 3:'18-55y', 5:'56-70y', 8:'>=71y'}
cohort_to_dose = {2:'100ug', 3:'250ug', 5:'100ug', 8:'100ug'}
visit_to_day = {9:36, 12:119}

all_samples = (pd.concat([box1, box2])
           .assign(Amount=0.5)
           .merge(sample_key, on='Specimen ID', how='outer', validate='one_to_one')
           .assign(BloomSample=lambda x: x['Cohort'].isin([3]),
                   day=lambda x: x['Visit No.'].map(visit_to_day),
                   age=lambda x: x['Cohort'].map(cohort_to_age),
                   dose=lambda x: x['Cohort'].map(cohort_to_dose),
                  )
          )

all_samples.to_csv(f'{resultsdir}/all_samples.csv', index=False)
HTML(all_samples.head().to_html(index=False))

Subject,Visit Number,Specimen ID,Specimen Type,Amount,Unit,box,Cohort,Visit No.,BloomSample,day,age,dose
4853484848*,3232325049*,205041377,Serum,0.5,mL,box1,2,12,False,119,18-55y,100ug
4853484848*,3232325748*,204627096,Serum,0.5,mL,box1,2,9,False,36,18-55y,100ug
4857484848*,3232325049*,205044325,Serum,0.5,mL,box1,3,12,True,119,18-55y,250ug
4857484848*,3232325049*,205044326,Serum,0.5,mL,box1,3,12,True,119,18-55y,250ug
4857484848*,3232325748*,204626446,Serum,0.5,mL,box1,3,9,True,36,18-55y,250ug


function to map "Subject" to our new naming scheme (M01, M02, etc.).

In [4]:
bloom_subjects = [i for i in sorted(list(set(all_samples.query('BloomSample')['Subject'])))]
subject_numbers= ['M0'+str(i) for i in range(1,10)] + ['M'+str(i) for i in range(10,len(bloom_subjects)+1)]
name_subjects = {k:v for k, v in zip(bloom_subjects, subject_numbers)}
print(name_subjects)

{'4856484848*': 'M01', '4857484848*': 'M02', '4956484848*': 'M03', '4957484848*': 'M04', '5057484848*': 'M05', '5156484848*': 'M06', '5157484848*': 'M07', '5256484848*': 'M08', '5257484848*': 'M09', '5356484848*': 'M10', '5357484848*': 'M11', '5456484848*': 'M12', '5556484848*': 'M13', '5655484848*': 'M14', '5755484848*': 'M15'}


In [5]:
samples = (all_samples
           .query('BloomSample')
           .assign(subject_name=lambda x: x['Subject'].map(name_subjects),
                   sample=lambda x: x['subject_name'] + '-day-' + x['day'].astype(str),
                   JulieSample=lambda x: x['sample'].duplicated()
                  )
          )
samples.to_csv(f'{resultsdir}/bloom_samples.csv', index=False)
HTML(samples.head().to_html(index=False))

Subject,Visit Number,Specimen ID,Specimen Type,Amount,Unit,box,Cohort,Visit No.,BloomSample,day,age,dose,subject_name,sample,JulieSample
4857484848*,3232325049*,205044325,Serum,0.5,mL,box1,3,12,True,119,18-55y,250ug,M02,M02-day-119,False
4857484848*,3232325049*,205044326,Serum,0.5,mL,box1,3,12,True,119,18-55y,250ug,M02,M02-day-119,True
4857484848*,3232325748*,204626446,Serum,0.5,mL,box1,3,9,True,36,18-55y,250ug,M02,M02-day-36,False
4857484848*,3232325748*,204626448,Serum,0.5,mL,box1,3,9,True,36,18-55y,250ug,M02,M02-day-36,True
4957484848*,3232325049*,205044436,Serum,0.5,mL,box1,3,12,True,119,18-55y,250ug,M04,M04-day-119,False
