In [1]:
library('readr')
library('tidyr')
library('dplyr')
library('magrittr')
library('ggplot2')
library('stringr')
library('purrr')


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Attaching package: ‘magrittr’

The following object is masked from ‘package:tidyr’:

    extract


Attaching package: ‘purrr’

The following object is masked from ‘package:magrittr’:

    set_names



In [2]:
list.files('../data/', full.names=TRUE, pattern="*.csv")

In [3]:
all_donors <- 
    list.files('../data', full.names=TRUE, pattern="*.csv") %>%
    map(read_csv) %>%
    reduce(rbind) %>%
    mutate(`donor`=factor(`donor`))

all_donors %>% head

Parsed with column specification:
cols(
  .default = col_double(),
  barcode = col_character(),
  donor = col_character(),
  cell_clono_cdr3_aa = col_character(),
  cell_clono_cdr3_nt = col_character(),
  `A0101_VTEHDTLLY_IE-1_CMV_binder` = col_logical(),
  A0201_KTWGQYWQV_gp100_Cancer_binder = col_logical(),
  `A0201_ELAGIGILTV_MART-1_Cancer_binder` = col_logical(),
  A0201_CLLWSFQTSA_Tyrosinase_Cancer_binder = col_logical(),
  A0201_IMDQVPFSV_gp100_Cancer_binder = col_logical(),
  `A0201_SLLMWITQV_NY-ESO-1_Cancer_binder` = col_logical(),
  `A0201_KVAELVHFL_MAGE-A3_Cancer_binder` = col_logical(),
  `A0201_KVLEYVIKV_MAGE-A1_Cancer_binder` = col_logical(),
  `A0201_CLLGTYTQDV_Kanamycin-B-dioxygenase_binder` = col_logical(),
  `A0201_LLDFVRFMGV_EBNA-3B_EBV_binder` = col_logical(),
  `A0201_LLMGTLGIVC_HPV-16E7_82-91_binder` = col_logical(),
  `A0201_CLGGLLTMV_LMP-2A_EBV_binder` = col_logical(),
  A0201_YLLEMLWRL_LMP1_EBV_binder = col_logical(),
  A0201_FLYALALLL_LMP2A_EBV_binder = col_log

barcode,donor,cell_clono_cdr3_aa,cell_clono_cdr3_nt,CD3,CD19,CD45RA,CD4,CD8a,CD14,⋯,B0702_RPHERNGFTVL_pp65_CMV_binder,B0801_RAKFKQLL_BZLF1_EBV_binder,B0801_ELRRKMMYM_IE-1_CMV_binder,B0801_FLRGRAYGL_EBNA-3A_EBV_binder,A0101_SLEGGGLGY_NC_binder,A0101_STEGGGLAY_NC_binder,A0201_ALIAPVHAV_NC_binder,A2402_AYSSAGASI_NC_binder,B0702_GPAESAAGL_NC_binder,NR(B0801)_AAKGRGAAL_NC_binder
AAACCTGAGACAAAGG-4,donor1,TRA:CAASVSIWTGTASKLTF;TRA:CAAWDMEYGNKLVF;TRB:CAISDPGLAGGGGEQFF,TRA:TGTGCAGCAAGCGTTAGTATTTGGACCGGCACTGCCAGTAAACTCACCTTT;TRA:TGTGCCGCCTGGGACATGGAATATGGAAACAAGCTGGTCTTT;TRB:TGTGCCATCAGTGACCCCGGACTAGCGGGAGGCGGGGGGGAGCAGTTCTTC,2125,0,912,1,2223,4,⋯,False,False,False,False,False,False,False,False,False,False
AAACCTGAGACTGTAA-34,donor1,TRB:CASDTPVGQFF,TRB:TGTGCCAGCGATACCCCGGTTGGGCAGTTCTTC,1023,0,2028,2,3485,1,⋯,False,False,False,False,False,False,False,False,False,False
AAACCTGAGAGCCCAA-5,donor1,TRA:CASYTDKLIF;TRB:CASSGGSISTDTQYF,TRA:TGTGCTTCCTACACCGACAAGCTCATCTTT;TRB:TGCGCCAGCAGTGGCGGGAGTATTAGCACAGATACGCAGTATTTT,1598,3,3454,4,3383,1,⋯,False,False,False,False,False,False,False,False,False,False
AAACCTGAGAGCTGCA-24,donor1,TRB:CASSGGQSSYEQYF,TRB:TGCGCCAGCAGTGGCGGACAGAGCTCCTACGAGCAGTACTTC,298,1,880,1,2389,1,⋯,False,False,False,False,False,False,False,False,False,False
AAACCTGAGAGGGATA-8,donor1,TRA:CAASGYGNTGRRALTF;TRB:CASSQDPAGGYNEQFF,TRA:TGTGCAGCAAGCGGGTATGGAAACACGGGCAGGAGAGCACTTACTTTT;TRB:TGCGCCAGCAGCCAAGACCCAGCGGGGGGGTACAATGAGCAGTTCTTC,1036,0,2457,2,3427,3,⋯,False,False,False,False,False,False,False,False,False,False
AAACCTGAGAGTGAGA-23,donor1,TRA:CAAHLSNFGNEKLTF;TRB:CATSRDRGHGDTIYF,TRA:TGTGCAGCACACTTATCTAACTTTGGAAATGAGAAATTAACCTTT;TRB:TGTGCCACCAGCAGAGATCGGGGCCATGGGGACACCATATATTTT,1729,1,39,160,5671,5,⋯,False,False,False,False,False,False,False,False,False,False


In [4]:
all_tcrs <-
    all_donors %>%
    select(`barcode`:`cell_clono_cdr3_aa`) %>%
    separate_rows(`cell_clono_cdr3_aa`, sep=";") %>%
    separate(`cell_clono_cdr3_aa`, c('TCR', 'CDR3'), sep=":") %>%
    mutate(`TCR`=factor(`TCR`))

all_tcrs %>% head

barcode,donor,TCR,CDR3
AAACCTGAGACAAAGG-4,donor1,TRA,CAASVSIWTGTASKLTF
AAACCTGAGACAAAGG-4,donor1,TRA,CAAWDMEYGNKLVF
AAACCTGAGACAAAGG-4,donor1,TRB,CAISDPGLAGGGGEQFF
AAACCTGAGACTGTAA-34,donor1,TRB,CASDTPVGQFF
AAACCTGAGAGCCCAA-5,donor1,TRA,CASYTDKLIF
AAACCTGAGAGCCCAA-5,donor1,TRB,CASSGGSISTDTQYF


In [5]:
all_features <-
    all_donors %>%
    select(`barcode`, `donor`, `CD3`:`HLA-DR`) %>%
    gather(
        key="Marker",
        value="PExpression",
        factor_key=TRUE,
        `CD3`:`HLA-DR`
    )

all_features %>% head

barcode,donor,Marker,PExpression
AAACCTGAGACAAAGG-4,donor1,CD3,2125
AAACCTGAGACTGTAA-34,donor1,CD3,1023
AAACCTGAGAGCCCAA-5,donor1,CD3,1598
AAACCTGAGAGCTGCA-24,donor1,CD3,298
AAACCTGAGAGGGATA-8,donor1,CD3,1036
AAACCTGAGAGTGAGA-23,donor1,CD3,1729


In [6]:
all_binders <-
    all_donors %>%
    select(`barcode`, `donor`, contains('_binder')) %>%
    gather(
       key=`dextramer`,
       value=`binder`,
       `A0101_VTEHDTLLY_IE-1_CMV_binder`:`NR(B0801)_AAKGRGAAL_NC_binder`,
       factor_key=TRUE
    ) %>%
    mutate(`binder`=as.factor(`binder`))

all_binders %>% head

barcode,donor,dextramer,binder
AAACCTGAGACAAAGG-4,donor1,A0101_VTEHDTLLY_IE-1_CMV_binder,False
AAACCTGAGACTGTAA-34,donor1,A0101_VTEHDTLLY_IE-1_CMV_binder,False
AAACCTGAGAGCCCAA-5,donor1,A0101_VTEHDTLLY_IE-1_CMV_binder,False
AAACCTGAGAGCTGCA-24,donor1,A0101_VTEHDTLLY_IE-1_CMV_binder,False
AAACCTGAGAGGGATA-8,donor1,A0101_VTEHDTLLY_IE-1_CMV_binder,False
AAACCTGAGAGTGAGA-23,donor1,A0101_VTEHDTLLY_IE-1_CMV_binder,False


In [7]:
save(
    file='../data/all_donors.RData',
    all_donors,
    all_binders,
    all_features,
    all_tcrs
)