# Molecular mechanisms of antibiotic resistance

In [19]:
# Housekeeping

library(ggplot2)
library(scales)
library(tidyr)
source("source.R")

In [16]:
# Read in data

multihit = read.table("../../data/deep_seq/multihit_nonsynonymous_variant_data.txt", 
                      sep = "\t", 
                      header = T)

dim(multihit)

multihit_selection_coefs = read.table("../../data/deep_seq/multihit_variant_selection_coefficient_data.txt", 
                      sep = "\t", 
                      header = T)

dim(multihit_selection_coefs)

In [17]:
# Remove species for which there is variant data only with antibiotics since multihit genes cannot be compared
# with and without antibiotics

species_by_antibiotics = as.data.frame(table(multihit$SPECIES, by = multihit$ANTIBIOTIC))
species_removed = as.vector(unique(species_by_antibiotics$Var1[species_by_antibiotics$by == 0 & 
                                                               species_by_antibiotics$Freq == 0]))
species_removed

multihit = multihit[!(multihit$SPECIES %in% species_removed),]
dim(multihit)

multihit_selection_coefs = multihit_selection_coefs[!(multihit_selection_coefs$SPECIES %in% species_removed),]
dim(multihit_selection_coefs)

In [28]:
# Remove genes present both with and without antibiotic

genes_by_antibiotics = as.data.frame(table(multihit$GENE, by = multihit$ANTIBIOTIC))
genes_by_antibiotics = spread(genes_by_antibiotics, by, Freq)
genes_by_antibiotics$all_ab = rowSums(genes_by_antibiotics[,3:5])
genes_by_antibiotics$no_ab = ifelse(genes_by_antibiotics$'0' == 0, 0, 1)
genes_by_antibiotics$ab = ifelse(genes_by_antibiotics$all_ab == 0, 0, 1)

genes_kept = as.vector(unique(genes_by_antibiotics$Var1[genes_by_antibiotics$no_ab == 0 & 
                                                           genes_by_antibiotics$ab == 1]))

multihit = multihit[multihit$GENE %in% genes_kept,]
dim(multihit)

multihit_selection_coefs = multihit_selection_coefs[multihit_selection_coefs$GENE %in% genes_kept,]
dim(multihit_selection_coefs)

In [36]:
# Inspect

length(genes_kept) # only 10 genes left

genes_kept

unique(multihit$SPECIES)
unique(multihit$SPECIES[multihit$GENE == "CKLFCLNC_03362"])
unique(multihit$SPECIES[multihit$GENE == "IBBHIMLJ_02593"])
unique(multihit$SPECIES[multihit$GENE == "IDIPBEOP_05061"])

## Potential role of genes in streptomycin / aminoglycoside resistance

$rpsL$ (ribosomal protein S12, most common mutational target of streptomycin resistance):
https://www.ncbi.nlm.nih.gov/pubmed/7934937

$cra$ (transcription factor implicated in regulatory role affecting virulence / AMR):
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5295033/

$cspA$ (cold shock protein also implicated in AMR):
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC166043/

$ltxA$ (virulence factor, indirect relationship with antimicrobial susceptibility and drug efflux):
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1831674/

$luxQ$ (quorum sensing / phosphatase activity):
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC228483/
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2992599/

$phoQ$ (member of two-component regulatory system implicated in streptomycin resistance):
https://www.ncbi.nlm.nih.gov/pubmed/11021929

$rsmG$ (16S rRNA methyltransferase implicated in streptomycin resistance):
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1913335/
http://www.scielo.org.co/scielo.php?script=sci_arttext&pid=S0120-41572014000500006


### blastn hits for hypothetical proteins


HAMBI_105 (Agrobacterium tumefaciens): IDIPBEOP_05061
- hypothetical proteins in same and other species with no functional annotation

HAMBI_1896: IBBHIMLJ_02593
- homology with DNA-binding protein in Elisabethkingia anophelis (potential role for cold shock and DNA-binding proteins in aminoglycoside resistance: https://www.ncbi.nlm.nih.gov/pubmed/19907650)

HAMBI_3172: CKLFCLNC_03362
- homology with IS256 family transposase in same species (IS256 is associated with antibiotic, including aminoglycoside, resistance https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2916423/)

In [41]:
# High selection coefficients between t8 and t12

multihit_selection_coefs[multihit_selection_coefs$FREQ_S >= quantile(multihit_selection_coefs$FREQ_S, 0.9),]

# all occur without immigration and are associated with increase in species abundance

Unnamed: 0,ANTIBIOTIC,IMMIGRATION,REPLICATE,SPECIES,CONTIG,CONTIG_LENGTH,POSITION,REF_ALLELE,ALT_ALLELE,VARIANT_TYPE,⋯,NA_CHANGE,AA_CHANGE,FREQ_T8,FREQ_T12,ABUND_T8,ABUND_T12,FREQ_S,FREQ_W,ABUND_S,ABUND_W
147,128,0,1,HAMBI_105,IDIPBEOP_6,241380,209669,T,G,snp,⋯,c.358A>C,p.Thr120Pro,0.01,0.9166667,0.0004666667,0.004666667,1.748254,2.748254,0.576699,1.576699
203,128,0,3,HAMBI_105,IDIPBEOP_1,1656016,1148702,T,C,snp,⋯,c.197A>G,p.Tyr66Cys,0.01,0.875,0.0001333333,0.008266667,1.635257,2.635257,1.0338255,2.033826
282,16,0,1,HAMBI_105,IDIPBEOP_19,20135,7838,GCG,ACA,complex,⋯,c.61_63delGCGinsACA,p.Ala21Thr,0.6,0.999,6.666667e-05,0.0014,1.625322,2.625322,0.7614642,1.761464


In [42]:
# Low selection coefficients between t8 and t12

multihit_selection_coefs[multihit_selection_coefs$FREQ_S <= quantile(multihit_selection_coefs$FREQ_S, 0.1),]

# 2/3 occur with immigration, and these two are associated with no abundance change in species

Unnamed: 0,ANTIBIOTIC,IMMIGRATION,REPLICATE,SPECIES,CONTIG,CONTIG_LENGTH,POSITION,REF_ALLELE,ALT_ALLELE,VARIANT_TYPE,⋯,NA_CHANGE,AA_CHANGE,FREQ_T8,FREQ_T12,ABUND_T8,ABUND_T12,FREQ_S,FREQ_W,ABUND_S,ABUND_W
248,128,0,3,HAMBI_3172,CKLFCLNC_50,54856,1753,AC,GG,mnp,⋯,c.1654_1655delACinsGG,p.Thr552Gly,0.8333333,0.4285714,0.0001333333,0.0014,-0.47428,0.52572,0.5881607,1.588161
280,128,1,3,HAMBI_3172,CKLFCLNC_36,69417,62894,AC,ATC,ins,⋯,c.171_172insT,p.Gln58fs,0.8888889,0.5,6.666667e-05,6.666667e-05,-0.5198604,0.4801396,0.0,1.0
281,128,1,3,HAMBI_3172,CKLFCLNC_36,69417,63001,CCACCGA,GCACGAACCCGC,complex,⋯,c.278_284delCCACCGAinsGCACGAACCCGC,p.Thr93fs,0.75,0.3333333,6.666667e-05,6.666667e-05,-0.4479399,0.5520601,0.0,1.0
