RNA-seq quantification:

```
$ gsutil cp gs://musc-tcr-transfer/output/openvax/201912-mc38a/rna_L001_R1.fastq.gz .
$ gsutil cp gs://musc-tcr-transfer/output/openvax/201912-mc38a/rna_L001_R2.fastq.gz .

$ wget "http://ftp.ensembl.org/pub/release-103/fasta/mus_musculus/cdna/Mus_musculus.GRCm39.cdna.all.fa.gz"
$ kallisto index -i Mus_musculus.GRCm39.cdna.all.fa.gz Mus_musculus.GRCm39.cdna.all.fa.gz

$ kallisto quant -o MC38A-kallisto -i Mus_musculus.GRCm39.cdna.all.fa.gz rna_L001_R1.fastq.gz rna_L001_R2.fastq.gz
```

Binding prediction:

```
$ conda create -n mhc
$ conda activate mhc
$ pip install mhcflurry mhctools topiary
$ pyensembl install --release 102 --species mus_musculus

$ gsutil cp gs://musc-tcr-transfer/output/openvax/201912-mc38a/mutect.vcf .
$ gsutil cp gs://musc-tcr-transfer/output/openvax/201912-mc38a/strelka.vcf .

$ topiary --mhc-predictor mhcflurry --mhc-alleles "H-2-Kb H-2Db" --vcf mutect.vcf --vcf strelka.vcf --output-csv MC38A-neoepitopes.csv --output-html MC38A-neoepitopes.html
```

In [1]:
library('tidyverse')

── [1mAttaching packages[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.2     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.4     [32m✔[39m [34mdplyr  [39m 1.0.2
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.0

── [1mConflicts[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [4]:
epitopes <-
    read_csv('MC38A-neoepitopes.csv')

epitopes %>% head


[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
  `#` = [32mcol_double()[39m,
  variant = [31mcol_character()[39m,
  peptide_offset = [32mcol_double()[39m,
  peptide = [31mcol_character()[39m,
  allele = [31mcol_character()[39m,
  score = [32mcol_double()[39m,
  affinity = [32mcol_double()[39m,
  percentile_rank = [32mcol_double()[39m,
  prediction_method_name = [31mcol_character()[39m,
  peptide_length = [32mcol_double()[39m,
  gene = [31mcol_character()[39m,
  gene_id = [31mcol_character()[39m,
  transcript_id = [31mcol_character()[39m,
  transcript_name = [31mcol_character()[39m,
  effect = [31mcol_character()[39m,
  effect_type = [31mcol_character()[39m,
  contains_mutant_residues = [33mcol_logical()[39m,
  mutation_start_in_peptide = [32mcol_double()[39m,
  mutation_

#,variant,peptide_offset,peptide,allele,score,affinity,percentile_rank,prediction_method_name,peptide_length,gene,gene_id,transcript_id,transcript_name,effect,effect_type,contains_mutant_residues,mutation_start_in_peptide,mutation_end_in_peptide
<dbl>,<chr>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<dbl>,<dbl>
0,chr8 g.3496674G>C,410,AAAAAAAAP,H-2-Db,0.10000466,16945.373,7.45525,mhcflurry,9,Zfp358,ENSMUSG00000047264,ENSMUST00000208423,Zfp358-203,p.A419P,Substitution,True,8,9
1,chr5 g.66022672C>T,491,AAAAAAAAT,H-2-Db,0.11647544,14179.32,5.8145,mhcflurry,9,Rbm47,ENSMUSG00000070780,ENSMUST00000201544,Rbm47-209,p.A500T,Substitution,True,8,9
2,chr8 g.3496674G>C,411,AAAAAAAPA,H-2-Db,0.23921349,3757.621,1.89425,mhcflurry,9,Zfp358,ENSMUSG00000047264,ENSMUST00000208423,Zfp358-203,p.A419P,Substitution,True,7,8
3,chr5 g.66022672C>T,492,AAAAAAATA,H-2-Db,0.22478828,4392.347,2.102125,mhcflurry,9,Rbm47,ENSMUSG00000070780,ENSMUST00000201544,Rbm47-209,p.A500T,Substitution,True,7,8
4,chr6 g.125101598G>T,219,AAAAAAAVS,H-2-Db,0.08228799,20525.853,10.252875,mhcflurry,9,Chd4,ENSMUSG00000063870,ENSMUST00000056889,Chd4-201,p.A228S,Substitution,True,8,9
5,chr8 g.3496674G>C,412,AAAAAAPAA,H-2-Db,0.20740198,5301.446,2.403125,mhcflurry,9,Zfp358,ENSMUSG00000047264,ENSMUST00000208423,Zfp358-203,p.A419P,Substitution,True,6,7


In [7]:
exp_tbl <-
    read_tsv('./MC38A-kallisto/abundance.tsv') %>%
    separate(`target_id`, c('transcript_id', 't_version'), sep='\\.')

exp_tbl %>% head


[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
  target_id = [31mcol_character()[39m,
  length = [32mcol_double()[39m,
  eff_length = [32mcol_double()[39m,
  est_counts = [32mcol_double()[39m,
  tpm = [32mcol_double()[39m
)




transcript_id,t_version,length,eff_length,est_counts,tpm
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
ENSMUST00000178537,2,12,3.0,0,0
ENSMUST00000178862,2,14,3.5,0,0
ENSMUST00000196221,2,9,10.0,0,0
ENSMUST00000179664,2,11,2.0,0,0
ENSMUST00000177564,2,16,5.5,0,0
ENSMUST00000179520,2,11,2.0,0,0


In [14]:
neoantigens_tbl <-
    epitopes %>%
    left_join(
        exp_tbl %>% select(`transcript_id`, `est_counts`, `TPM`=`tpm`), 
        by='transcript_id'
    ) %>%
    arrange(`affinity`) %>%
    select(-`#`) %>%
    relocate(`gene`, `peptide`, `allele`, `affinity`, `TPM`)
    

neoantigens_tbl

gene,peptide,allele,affinity,TPM,variant,peptide_offset,score,percentile_rank,prediction_method_name,peptide_length,gene_id,transcript_id,transcript_name,effect,effect_type,contains_mutant_residues,mutation_start_in_peptide,mutation_end_in_peptide,est_counts
<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<dbl>,<dbl>,<dbl>
Slit1,SSFKHMPKL,H-2-Kb,16.88022,0.02979910,chr19 g.41651178G>T,198,0.7387985,0.003500,mhcflurry,9,ENSMUSG00000025020,ENSMUST00000025993,Slit1-201,p.N202K,Substitution,TRUE,3,4,9.56654
Aldh1l1,SSYPWRSSM,H-2-Kb,19.93408,0.00000000,chr6 g.90557285_90557285delA,87,0.7234296,0.011250,mhcflurry,9,ENSMUSG00000030088,ENSMUST00000130418,Aldh1l1-203,p.K21fs,FrameShift,TRUE,0,9,0.00000
Adpgk,ASMTNMELM,H-2-Db,20.89715,24.29980000,chr9 g.59313823G>T,298,0.7190688,0.000500,mhcflurry,9,ENSMUSG00000025236,ENSMUST00000217570,Adpgk-205,p.R304M,Substitution,TRUE,5,6,3613.47000
Olfr781,MSYDRYVAM,H-2-Kb,20.96976,0.04039030,chr10 g.129333251C>G,114,0.7187483,0.013375,mhcflurry,9,ENSMUSG00000095138,ENSMUST00000204108,Olfr781-202,p.I123M,Substitution,TRUE,8,9,2.79909
Olfr1442,ISFIYFNNV,H-2-Kb,21.28552,0.00000000,chr19 g.12674310C>A,28,0.7173669,0.014625,mhcflurry,9,ENSMUSG00000044441,ENSMUST00000208494,Olfr1442-203,p.T35N,Substitution,TRUE,6,7,0.00000
Hdgfl2,KGYPHWPAL,H-2-Kb,21.38661,10.06590000,chr17 g.56079825G>T,15,0.7169290,0.014625,mhcflurry,9,ENSMUSG00000002833,ENSMUST00000225843,Hdgfl2-210,p.R24L,Substitution,TRUE,8,9,1330.21000
Zfc3h1,SNYEFPNRV,H-2-Kb,22.12009,8.23265000,chr10 g.115427783A>C,1837,0.7138124,0.016875,mhcflurry,9,ENSMUSG00000034163,ENSMUST00000036044,Zfc3h1-201,p.H1843P,Substitution,TRUE,5,6,3702.18000
Zbtb40,KSFHFYCPL,H-2-Kb,22.34975,15.95300000,chr4 g.136995463C>G,760,0.7128578,0.017375,mhcflurry,9,ENSMUSG00000060862,ENSMUST00000049583,Zbtb40-201,p.R768P,Substitution,TRUE,7,8,7426.24000
Irgq,AALLNSAVL,H-2-Db,23.12299,13.02770000,chr7 g.24531801G>T,131,0.7097143,0.001000,mhcflurry,9,ENSMUSG00000041037,ENSMUST00000049020,Irgq-201,p.G139V,Substitution,TRUE,7,8,5092.30000
1700013G24Rik,SSYTKALSL,H-2-Kb,23.25887,0.03840650,chr4 g.137454992C>A,149,0.7091727,0.020000,mhcflurry,9,ENSMUSG00000041399,ENSMUST00000048893,1700013G24Rik-201,p.P153T,Substitution,TRUE,3,4,2.00000


In [15]:
neoantigens_tbl %>%
    write_csv('MC38A-neoantigens_with_expression.csv')