In [37]:
library(ggplot2)
library(reshape2)
library(RColorBrewer)
suppressMessages(library(dplyr))
library(stringr)
suppressMessages(library(tidyr))
theme_set(theme_bw())
library(knitr)
library(scales)
library(IRdisplay)
options(repr.plot.width=7, repr.plot.height=4)
isotypes = c('Ala', 'Arg', 'Asn', 'Asp', 'Cys', 'Gln', 'Glu', 'Gly', 'His', 'Ile', 'iMet', 'Leu', 'Lys', 'Met', 'Phe', 'Pro', 'Ser', 'Thr', 'Trp', 'Tyr', 'Val')

paired_positions = c('X1.72'='1:72', 'X2.71'='2:71', 'X3.70'='3:70', 'X4.69'='4:69', 'X5.68'='5:68', 'X6.67'='6:67', 'X7.66'='7:66', 'X8.14'='8:14', 'X9.23'='9:23', 'X10.25'='10:25', 'X10.45'='10:45', 'X11.24'='11:24', 'X12.23'='12:23', 'X13.22'='13:22', 'X15.48'='15:48','X18.55'='18:55', 'X19.56'='19:56', 'X22.46'='22:46', 'X26.44'='26:44', 'X27.43'='27:43', 'X28.42'='28:42', 'X29.41'='29:41', 'X30.40'='30:40', 'X31.39'='31:39', 'X49.65'='49:65', 'X50.64'='50:64', 'X51.63'='51:63', 'X52.62'='52:62', 'X53.61'='53:61', 'X54.58'='54:58')
paired_identities = c('GC', 'AU', 'UA', 'CG', 'GU', 'UG', 'Absent', 'PurinePyrimidine', 'PyrimidinePurine', 'StrongPair', 'WeakPair', 'AminoKeto', 'KetoAmino', 'Wobble', 'Paired', 'Bulge', 'Mismatched')
paired_colors = c('GC'='gray20', 'AU'='gray20', 'UA'='gray20', 'CG'='gray20', 'GU'='gray20', 'UG'='gray20', 'Absent'='gray20', 'PurinePyrimidine'='gray40', 'PyrimidinePurine'='gray40', 'StrongPair'='gray40', 'WeakPair'='gray40', 'AminoKeto'='gray40', 'KetoAmino'='gray40', 'Wobble'='gray40', 'Paired'='gray40', 'Bulge'='gray40', 'Mismatched'='gray40')

single_positions = c('X8'='8', 'X9'='9', 'X14'='14', 'X15'='15', 'X16'='16', 'X17'='17', 'X17a'='17a', 'X18'='18', 'X19'='19', 'X20'='20', 'X20a'='20a', 'X20b'='20b', 'X21'='21', 'X26'='26', 'X32'='32', 'X33'='33', 'X34'='34', 'X35'='35', 'X36'='36', 'X37'='37', 'X38'='38', 'X44'='44', 'X45'='45', 'X46'='46', 'X47'='47', 'X48'='48', 'X54'='54', 'X55'='55', 'X56'='56', 'X57'='57', 'X58'='58', 'X59'='59', 'X60'='60', 'X73'='73')
single_identities = c('A', 'C', 'G', 'U', 'Absent', 'Purine', 'Pyrimidine', 'Weak', 'Strong', 'Amino', 'Keto', 'B', 'D', 'H', 'V')
single_colors = c('A'='gray20', 'C'='gray20', 'G'='gray20', 'U'='gray20', 'Absent'='gray20', 'Purine'='gray40', 'Pyrimidine'='gray40', 'Weak'='gray40', 'Strong'='gray40', 'Amino'='gray40', 'Keto'='gray40', 'B'='gray40', 'D'='gray40', 'H'='gray40', 'V'='gray40')

identities = read.delim('identities.tsv', sep='\t', stringsAsFactors=FALSE)
identities$quality = as.logical(identities$quality)
identities$restrict = as.logical(identities$restrict)
identities = identities %>% filter(isotype %in% isotypes)
positions = colnames(identities)[which(str_detect(colnames(identities), "X\\d+\\.\\d+$"))]
positions = c(positions, 'X8', 'X9', 'X14', 'X15', 'X16', 'X17', 'X17a', 'X18', 'X19', 'X20', 'X20a', 'X20b', 'X21', 'X26', 'X32', 'X33', 'X34', 'X35', 'X36', 'X37', 'X38', 'X44', 'X45', 'X46', 'X47', 'X48', 'X54', 'X55', 'X56', 'X57', 'X58', 'X59', 'X60', 'X73')

load('best-freqs.RData')
load('clade-isotype-specific.RData')
load('clade-isotype-freqs.RData')

# Universal

## 8:14

In [28]:
# isotype/species check
identities %>% filter(!restrict) %>% select(clade, species, isotype, X8.14) %>% 
  count(clade, species, isotype, X8.14) %>% spread(X8.14, n, 0) %>% filter(`U:A` == 0)

# clade/isotype quantification
identities %>% select(clade, isotype, restrict, X8.14) %>% filter(!restrict, X8.14 != 'U:A') %>%
  count(clade, isotype, X8.14) %>% spread(isotype, n, 0)

Unnamed: 0,clade,species,isotype,-:A,A:A,C:A,G:A,U:A,U:C,U:G,U:U
1,Fungi,sporReil_SRZ2,Cys,0,2,0,0,0,0,0,0
2,Fungi,ustiMayd_521,Cys,0,1,0,0,0,0,0,0


Unnamed: 0,clade,X8.14,Ala,Arg,Asn,Asp,Cys,Gln,Glu,Gly,Ile,Leu,Lys,Phe,Pro,Ser,Thr,Trp,Tyr,Val
1,Fungi,A:A,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Fungi,U:G,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,Insecta,C:A,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4,Mammalia,-:A,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1
5,Mammalia,A:A,0,0,0,0,1,1,2,0,0,0,1,1,0,0,0,0,0,0
6,Mammalia,C:A,0,1,6,3,5,0,12,0,1,2,7,1,0,5,1,0,1,1
7,Mammalia,G:A,1,0,1,0,4,0,2,0,1,1,6,0,0,0,0,0,1,0
8,Mammalia,U:C,0,0,0,0,2,0,0,1,1,0,0,1,0,0,0,0,1,0
9,Mammalia,U:G,0,1,3,0,3,1,0,8,2,0,2,0,1,3,1,0,0,1
10,Mammalia,U:U,0,0,0,0,1,0,0,0,0,1,3,2,0,0,0,0,0,0


## R9

In [21]:
# clade/isotype check
identities %>% filter(!restrict) %>% select(clade, isotype, X9) %>%
  count(clade, isotype, X9) %>% spread(X9, n, 0) %>% filter(A == 0 & G == 0)

# clade/isotype quantification
identities %>% select(clade, isotype, restrict, X9) %>% filter(!restrict, !(X9 %in% c('A', 'G'))) %>%
  count(clade, isotype, X9) %>% spread(isotype, n, 0)

Unnamed: 0,clade,isotype,-,A,C,G,U


Unnamed: 0,clade,X9,Arg,Asn,Gln,Glu,Gly,His,Ile,Leu,Lys,Met,Phe,Pro,Ser,Thr,Trp,Tyr,Val
1,Fungi,C,0,0,8,0,8,26,0,0,0,0,0,2,1,0,0,2,1
2,Fungi,U,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,Insecta,-,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Insecta,C,0,0,0,1,0,22,0,0,0,0,0,0,0,0,0,0,0
5,Insecta,U,0,0,0,39,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Mammalia,C,0,0,1,2,1,73,1,0,1,1,0,0,0,1,0,0,0
7,Mammalia,U,0,1,0,6,0,0,0,1,7,0,2,0,0,0,1,0,0
8,Nematoda,-,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
9,Nematoda,C,1,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,1
10,Nematoda,U,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1


In [20]:
# odd bumps in Glu, His, Phe
identities %>% filter(((clade %in% c("Insecta", "Mammalia", "Vertebrata")) & isotype == "His") | 
                      ((clade %in% c("Insecta", "Spermatophyta")) & isotype == "Glu") |
                      (clade == "Vertebrata" & isotype == "Phe")) %>%
  select(clade, isotype, X9) %>% group_by(clade, isotype, X9) %>% tally %>% spread(X9, n, 0)

Unnamed: 0,clade,isotype,A,C,G,U
1,Insecta,Glu,0,1,136,163
2,Insecta,His,1,114,0,0
3,Mammalia,His,1,365,4,0
4,Spermatophyta,Glu,360,236,2,2
5,Vertebrata,His,4,373,0,5
6,Vertebrata,Phe,412,0,1,55


In [22]:
# check each individually for species
identities %>% filter(clade == "Insecta", isotype == "Glu") %>% select(species, X9) %>% count(species, X9) %>% spread(X9, n, 0)
identities %>% filter(clade == "Insecta", isotype == "His") %>% select(species, X9) %>% count(species, X9) %>% spread(X9, n, 0)
identities %>% filter(clade == "Mammalia", isotype == "His") %>% select(species, X9) %>% count(species, X9) %>% spread(X9, n, 0)
identities %>% filter(clade == "Spermatophyta", isotype == "Glu") %>% select(species, X9) %>% count(species, X9) %>% spread(X9, n, 0)
identities %>% filter(clade == "Vertebrata", isotype == "His") %>% select(species, X9) %>% count(species, X9) %>% spread(X9, n, 0)
# zebrafish accounts for all of the weird ones here
identities %>% filter(clade == "Vertebrata", isotype == "Phe") %>% select(species, X9) %>% count(species, X9) %>% spread(X9, n, 0)

Unnamed: 0,species,C,G,U
1,anoGam2,0,11,15
2,apiMel1,0,11,0
3,bomTer1,0,10,0
4,dm6,0,6,13
5,dp4,0,6,17
6,droAna3,0,6,13
7,droEre2,0,6,12
8,droGri2,1,5,10
9,droMoj3,0,6,11
10,droPer1,0,6,14


Unnamed: 0,species,A,C
1,anoGam2,0,21
2,apiMel1,0,7
3,bomTer1,0,6
4,dm6,0,5
5,dp4,0,5
6,droAna3,0,5
7,droEre2,0,6
8,droGri2,0,5
9,droMoj3,0,5
10,droPer1,0,7


Unnamed: 0,species,A,C,G
1,ailMel1,0,8,0
2,balAcu1,0,8,0
3,bosTau8,0,15,0
4,calJac3,0,7,0
5,canFam3,0,8,0
6,cavPor3,0,7,0
7,cerSim1,0,9,0
8,criGri1,0,13,0
9,dasNov3,1,8,0
10,dipOrd1,0,1,0


Unnamed: 0,species,A,C,G,U
1,araTha1,13,10,0,0
2,braDis3,19,10,0,0
3,braOle1,25,20,0,0
4,carPap1,8,7,0,0
5,cucSat1,17,13,0,0
6,fraVes1,18,10,0,0
7,glyMax2,24,16,0,0
8,gosRai2,19,18,0,0
9,malDom1,26,21,0,1
10,manEsc6,24,19,0,0


Unnamed: 0,species,A,C,U
1,anoCar2,0,4,0
2,calMil1,0,13,0
3,chrPic1,1,6,0
4,danRer10,2,190,3
5,fr3,0,15,0
6,gadMor1,0,12,0
7,galGal4,0,4,1
8,gasAcu1,0,40,0
9,geoFor1,0,2,0
10,latCha1,0,4,0


Unnamed: 0,species,A,G,U
1,anoCar2,5,0,0
2,calMil1,41,0,0
3,chrPic1,19,0,0
4,danRer10,120,0,55
5,fr3,17,0,0
6,gadMor1,31,1,0
7,galGal4,8,0,0
8,gasAcu1,35,0,0
9,geoFor1,5,0,0
10,latCha1,8,0,0


## Y11:R24

Also see identity-stories

In [29]:
# species/isotype check
identities %>% filter(!restrict) %>% select(clade, species, isotype, X11.24) %>%
  count(clade, species, isotype, X11.24) %>% spread(X11.24, n, 0) %>% filter(`C:G` == 0 & `U:A` == 0)

# clade/isotype quantification
identities %>% select(clade, isotype, restrict, X11.24) %>% filter(!restrict, !(X11.24 %in% c('U:A', 'C:G'))) %>%
  count(clade, isotype, X11.24) %>% spread(isotype, n, 0)

Unnamed: 0,clade,species,isotype,A:A,A:C,A:G,A:U,C:A,C:C,C:G,C:U,G:A,G:C,G:G,U:A,U:C,U:G,U:U
1,Fungi,aspeFumi_AF293,Tyr,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
2,Fungi,crypGatt_WM276,Tyr,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
3,Fungi,crypNeof_VAR_GRUBII_H99,Tyr,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
4,Fungi,crypNeof_VAR_NEOFORMANS_B_3501,Tyr,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
5,Fungi,crypNeof_VAR_NEOFORMANS_JEC21,Tyr,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
6,Fungi,flamVelu_KACC42780,His,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0
7,Fungi,schiPomb_972H,iMet,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


Unnamed: 0,clade,X11.24,Ala,Arg,Asn,Asp,Cys,Gln,Glu,Gly,⋯,Leu,Lys,Met,Phe,Pro,Ser,Thr,Trp,Tyr,Val
1,Fungi,A:U,0,1,0,0,0,5,0,2,⋯,0,0,0,0,0,0,0,0,0,3
2,Fungi,C:A,1,0,0,0,0,0,0,0,⋯,0,0,0,0,1,1,0,0,0,0
3,Fungi,C:C,0,0,1,0,0,0,0,0,⋯,1,0,0,0,0,0,0,0,0,0
4,Fungi,G:C,2,0,0,0,0,2,1,2,⋯,11,0,0,0,0,6,0,0,0,1
5,Fungi,U:G,3,0,1,0,0,0,0,0,⋯,1,1,0,0,4,0,1,0,9,87
6,Insecta,A:G,0,0,1,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
7,Insecta,C:A,0,0,0,0,0,0,0,0,⋯,0,0,0,0,1,0,0,0,0,0
8,Insecta,U:G,0,1,0,0,0,0,0,1,⋯,0,0,0,0,0,0,0,0,1,0
9,Mammalia,A:C,0,0,0,0,0,0,0,0,⋯,0,0,1,0,0,0,0,0,0,0
10,Mammalia,A:G,0,0,0,0,1,0,0,0,⋯,0,3,0,0,0,0,1,0,0,0


## R15:Y48

In [30]:
# check no. isotypes without R15:Y48
identities %>% filter(!restrict) %>% count(species, isotype, X15.48) %>% spread(X15.48, n, fill = 0) %>% 
  filter(`G:C` == 0 & `A:U` == 0) %>% 
  ungroup %>% select(isotype) %>% unique

# check no. species with tRNAs of an isotype without R15:Y48
identities %>% filter(!restrict) %>% count(species, isotype, X15.48) %>% spread(X15.48, n, fill = 0) %>% 
  filter(`G:C` == 0 & `A:U` == 0) %>% 
  ungroup %>% select(species) %>% unique

Unnamed: 0,isotype
1,Ala
2,Glu
3,Asp
4,Phe
5,Met
6,His
7,Asn
8,Gln


Unnamed: 0,species
1,araTha1
2,ashbGoss_ATCC10895
3,aspeFumi_AF293
4,aspeNidu_FGSC_A4
5,botrCine_B05_10
6,braDis3
7,candAlbi_WO_1
8,candDubl_CD36
9,candGlab_CBS_138
10,candOrth_CO_90_125


## Y16

In [100]:
# check no. isotypes without Y16
identities %>% filter(!restrict) %>% count(species, isotype, X16) %>% spread(X16, n, fill = 0) %>% 
  filter(C == 0 & U == 0) %>% 
  ungroup %>% select(isotype) %>% unique

# check no. species with tRNAs of an isotype without R15:Y48
identities %>% filter(!restrict) %>% count(species, isotype, X16) %>% spread(X16, n, fill = 0) %>% 
  filter(C == 0 & U == 0) %>% 
  ungroup %>% select(species) %>% unique

Unnamed: 0,isotype
1,Pro
2,iMet
3,His
4,Cys
5,Phe
6,Asp
7,Trp
8,Glu


Unnamed: 0,species
1,anoCar2
2,anoGam2
3,aspeFumi_AF293
4,aspeOryz_RIB40
5,bomTer1
6,candOrth_CO_90_125
7,cavPor3
8,chaeTher_VAR_THERMOPHILUM_DSM1
9,chrPic1
10,criGri1


## 18:55, 19:56

In [39]:
# 18:55 fails isotype/species check
identities %>% filter(!restrict) %>% select(clade, species, isotype, X18.55) %>%
  count(clade, species, isotype, X18.55) %>% spread(X18.55, n, 0) %>% filter(`G:U` == 0)

# 19:56 fails clade/isotype check....at 89.9%.
clade_iso_freqs %>% filter(freq > 0.5, positions == 'X19.56') %>%
  filter((clade == "Mammalia" & isotype == "Lys") | (clade == "Vertebrata" & isotype == "Glu"))
identities %>% filter(!restrict, (clade == "Mammalia" & isotype == "Lys") | (clade == "Vertebrata" & isotype == "Glu")) %>%
  select(clade, species, isotype, X19.56) %>% count(clade, species, isotype, X19.56) %>% 
  spread(X19.56, n, 0)

Unnamed: 0,clade,species,isotype,-:U,A:G,A:U,C:U,G:-,G:A,G:C,G:G,G:U,U:U
1,Fungi,flamVelu_KACC42780,Pro,0,0,2,2,0,0,0,0,0,0


Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Mammalia,Lys,X19.56,865,Paired,844,0.975722543352601


Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Mammalia,Lys,X19.56,865,GC,778,0.899421965317919
2,Vertebrata,Glu,X19.56,277,GC,254,0.916967509025271
3,Mammalia,Lys,X19.56,865,Paired,844,0.975722543352601
4,Vertebrata,Glu,X19.56,277,Paired,269,0.971119133574007
5,Mammalia,Lys,X19.56,865,PurinePyrimidine,778,0.899421965317919
6,Vertebrata,Glu,X19.56,277,PurinePyrimidine,254,0.916967509025271
7,Mammalia,Lys,X19.56,865,StrongPair,778,0.899421965317919
8,Vertebrata,Glu,X19.56,277,StrongPair,254,0.916967509025271


Unnamed: 0,clade,species,isotype,A:A,A:C,A:U,C:C,G:-,G:A,G:C,G:G,G:U,U:C,U:U
1,Mammalia,ailMel1,Lys,0,1,0,1,0,0,33,1,5,0,0
2,Mammalia,balAcu1,Lys,0,0,0,0,0,0,17,0,0,0,0
3,Mammalia,bosTau8,Lys,0,0,0,0,0,1,26,0,0,0,0
4,Mammalia,calJac3,Lys,0,0,0,0,0,0,9,0,1,0,0
5,Mammalia,canFam3,Lys,0,2,0,0,0,1,29,0,5,0,0
6,Mammalia,cavPor3,Lys,0,1,0,0,0,0,23,0,3,0,0
7,Mammalia,cerSim1,Lys,0,0,0,0,0,0,14,0,1,0,0
8,Mammalia,criGri1,Lys,0,0,0,0,0,0,36,0,3,0,0
9,Mammalia,dasNov3,Lys,0,0,1,0,0,0,27,0,2,0,0
10,Mammalia,dipOrd1,Lys,0,0,0,0,0,0,1,0,0,0,0


## A21

In [40]:
identities %>% filter(!restrict) %>% select(clade, species, isotype, X21) %>% 
  count(clade, species, isotype, X21) %>% spread(X21, n, 0) %>% filter(A == 0)

Unnamed: 0,clade,species,isotype,A,C,G,U
1,Fungi,aspeNidu_FGSC_A4,Phe,0,0,0,2
2,Fungi,schiPomb_972H,Met,0,0,2,0
3,Fungi,sporReil_SRZ2,Cys,0,0,0,2
4,Fungi,sporReil_SRZ2,Phe,0,4,0,0
5,Fungi,ustiMayd_521,Cys,0,0,0,1
6,Fungi,ustiMayd_521,Phe,0,2,0,0


## U33

In [43]:
# Clade/isotype quantification
identities %>% filter(!restrict) %>% select(clade, isotype, X33) %>% filter(X33 != "U") %>%
  count(clade, isotype, X33) %>% spread(isotype, n, 0) %>%
  kable %>% paste(collapse = '\n') %>% display_markdown

|clade         |X33 | Ala| Arg| Asp| Cys| Gln| Glu| Gly| His| Ile| iMet| Leu| Lys| Met| Phe| Pro| Ser| Thr| Trp| Tyr| Val|
|:-------------|:---|---:|---:|---:|---:|---:|---:|---:|---:|---:|----:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
|Fungi         |A   |   1|   0|   0|   0|   2|   1|   1|   1|   0|    0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|
|Fungi         |C   |   8|   0|   0|   0|   0|   0|   4|   0|   0|    1|  14|   0|   1|   1|   0|   4|   1|   0|   0|   1|
|Fungi         |G   |   0|   0|   0|   0|   0|   1|   0|   0|   0|    0|   6|   0|   0|   0|   0|   1|   0|   0|   0|   0|
|Insecta       |A   |   0|   0|   0|   0|   0|   1|   0|   0|   0|    0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|
|Insecta       |C   |   0|   0|   0|   1|   0|   0|   2|   0|   0|   28|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|
|Mammalia      |A   |   0|   0|   0|   0|   0|   0|   0|   0|   1|    0|   0|   0|   0|   0|   0|   0|   0|   1|   0|   0|
|Mammalia      |C   |   6|   0|   2|   3|   0|   0|   1|   1|   3|   86|   1|  10|   1|   2|   0|   0|   1|   6|   3|   0|
|Mammalia      |G   |   1|   0|   0|   1|   0|   0|   0|   0|   0|    0|   0|   2|   0|   1|   0|   1|   0|   0|   1|   0|
|Nematoda      |-   |   0|   0|   0|   0|   0|   0|   0|   0|   0|    0|   0|   0|   0|   0|   0|   2|   0|   0|   0|   0|
|Nematoda      |C   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   17|   0|   0|   0|   0|   0|   3|   0|   0|   0|   0|
|Nematoda      |G   |   0|   0|   0|   0|   0|   0|   0|   0|   0|    0|   0|   0|   0|   0|   0|   6|   0|   0|   0|   0|
|Spermatophyta |A   |   0|   0|   0|   0|   0|   0|   0|   0|   0|    0|   0|   0|   0|   0|   0|   0|   0|   0|   1|   0|
|Spermatophyta |C   |   0|   0|   0|   0|   1|   0|   2|   0|   0|   45|   0|   0|   0|   0|   0|   1|   1|   0|   0|   1|
|Spermatophyta |G   |   0|   0|   0|   0|   0|   0|   0|   0|   0|    0|   0|   1|   0|   0|   1|   0|   0|   0|   0|   0|
|Vertebrata    |A   |   0|   1|   0|   0|   1|   1|   0|   0|   0|    0|   0|   4|   0|   2|   0|   1|   0|   1|   2|   1|
|Vertebrata    |C   |   0|   1|   0|   0|   0|   3|   2|   1|   1|   69|   2|   4|   0|   2|   1|   0|   0|   1|   3|   1|
|Vertebrata    |G   |   1|   0|   0|   1|   3|   0|   0|   0|   0|    0|   0|   2|   0|   0|   0|   2|   0|   0|   0|   0|

In [47]:
# No. species with only C33, no. species with only U33, and no. species with both
identities %>% filter(!restrict) %>% select(clade, species, isotype, X33) %>% 
  filter(isotype == "iMet") %>%
  count(clade, species, isotype, X33) %>% spread(X33, n, 0) %>% 
  group_by(clade) %>% summarize(Cs = sum(C > 0 & U == 0), Us = sum(U > 0 & C == 0), CUs = sum(U > 0 & C > 0))

# which fungi is it
identities %>% filter(!restrict, clade == "Fungi", isotype == 'iMet') %>% select(species, X33) %>%
  count(species, X33) %>% spread(X33, n, 0) %>% filter(U == 0)

Unnamed: 0,clade,Cs,Us,CUs
1,Fungi,1,49,0
2,Insecta,18,0,0
3,Mammalia,43,0,2
4,Nematoda,12,0,1
5,Spermatophyta,18,0,0
6,Vertebrata,15,0,2


Unnamed: 0,species,C,U
1,flamVelu_KACC42780,1,0


In [46]:
# which clades have what? iMet is exceptional so group on that
identities %>% select(clade, species, isotype, X33) %>%
  mutate(isotype = ifelse(isotype == 'iMet', 'iMet', 'Elongator')) %>%
  group_by(clade, isotype, X33) %>% tally %>% spread(X33, n, 0) 

Unnamed: 0,clade,isotype,-,A,C,G,U
1,Fungi,Elongator,0,6,43,8,12275
2,Fungi,iMet,0,0,9,0,194
3,Insecta,Elongator,0,1,5,0,4675
4,Insecta,iMet,0,0,96,0,0
5,Mammalia,Elongator,0,16,159,13,26986
6,Mammalia,iMet,0,0,403,0,2
7,Nematoda,Elongator,17,0,12,28,5869
8,Nematoda,iMet,0,0,95,0,1
9,Spermatophyta,Elongator,0,3,7,2,10771
10,Spermatophyta,iMet,0,0,204,0,0


## R37

In [48]:
identities %>% filter(!restrict) %>% select(clade, species, isotype, X37) %>%
  count(clade, species, isotype, X37) %>% spread(X37, n, 0) %>%
  filter(A + G < 1)

Unnamed: 0,clade,species,isotype,-,A,C,G,U


## R46

In [57]:
best_freqs %>% filter(positions == "X46", !(feature %in% c("A", "G", "Purine")))

clade_iso_freqs %>% filter(clade == "Fungi", isotype == "Leu", positions == "X46", n > 5)

Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Fungi,Leu,X46,459,Keto,434,0.945533769063181


Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Fungi,Leu,X46,459,A,10,0.0217864923747277
2,Fungi,Leu,X46,459,G,404,0.880174291938998
3,Fungi,Leu,X46,459,U,30,0.065359477124183
4,Fungi,Leu,X46,459,Absent,10,0.0217864923747277
5,Fungi,Leu,X46,459,Purine,404,0.880174291938998
6,Fungi,Leu,X46,459,Pyrimidine,30,0.065359477124183
7,Fungi,Leu,X46,459,Weak,30,0.065359477124183
8,Fungi,Leu,X46,459,Strong,404,0.880174291938998
9,Fungi,Leu,X46,459,Keto,434,0.945533769063181
10,Fungi,Leu,X46,459,B,434,0.945533769063181


In [53]:
# Clade/isotype quantification
identities %>% filter(!restrict) %>% select(clade, isotype, X46) %>% filter(X46 != "A" & X46 != "G") %>%
  count(clade, isotype, X46) %>% spread(isotype, n, 0) %>%
  kable %>% paste(collapse = '\n') %>% display_markdown

|clade         |X46 | Ala| Arg| Asn| Asp| Cys| Gln| Glu| Gly| His| Ile| Leu| Lys| Met| Phe| Pro| Ser| Thr| Trp| Tyr| Val|
|:-------------|:---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
|Fungi         |-   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|  10|   0|   0|   0|   0|   0|   0|   0|   0|   0|
|Fungi         |C   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   5|   0|   1|   0|   0|  20|   0|   0|   0|   0|
|Fungi         |U   |   0|   0|   0|   0|   0|   0|   0|   1|   0|   0|  30|   0|   0|   0|   0|  14|   0|   0|   0|   2|
|Insecta       |U   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   1|   0|   0|
|Mammalia      |-   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   1|   0|   1|   0|   0|
|Mammalia      |C   |   2|   0|   4|   0|   0|   0|   0|   0|   0|   0|   1|   4|   0|   0|   0|   3|   0|   0|   0|   5|
|Mammalia      |U   |   3|   6|   1|   0|   1|   0|   0|   0|   0|   0|  15|   7|   1|   4|   1|   4|   1|   0|   0|   0|
|Nematoda      |-   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   1|   0|   0|   0|   0|
|Nematoda      |C   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   2|   3|   1|   0|   0|   0|   0|   0|   0|   0|   0|
|Nematoda      |N   |   0|   0|   0|   0|   0|   1|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|
|Nematoda      |U   |   0|   1|   0|   0|   0|   0|   0|   0|   1|   0|  10|   0|   0|   0|   0|   0|   0|   1|   0|   4|
|Spermatophyta |-   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   1|   0|   0|   0|  43|   0|   0|   0|   0|
|Spermatophyta |C   |   0|   0|   3|   1|   0|   0|   0|   0|   0|   0|   0|   0|   1|   0|   0|   4|   0|   0|   0|   0|
|Spermatophyta |U   |   0|   1|   0|   0|  10|   0|   0|   0|   0|   2|   0|   0|   5|   0|   1|   0|   0|   0|   1|   0|
|Vertebrata    |-   |   0|   0|   0|   0|   0|   0|   1|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|
|Vertebrata    |C   |   0|   2|   2|   0|   0|   0|   0|   0|   0|   1|  21|   0|   0|   1|   0|   2|   1|   0|   0|   0|
|Vertebrata    |U   |   2|   2|   5|   0|   2|   0|   0|   0|   0|   2|  17|   1|   1|   2|   0|   2|   2|   0|   2|   4|

In [99]:
identities %>% filter(!restrict) %>% count(X4.69) %>% filter(X4.69 != '-') %>% mutate(freq = n/sum(n))

Unnamed: 0,X4.69,n,freq
1,-:-,19,0.0007202426080363
2,-:A,23,0.0008718726307808
3,-:C,23,0.0008718726307808
4,-:G,15,0.0005686125852918
5,A:A,23,0.0008718726307808
6,A:C,83,0.0031463229719484
7,A:G,15,0.0005686125852918
8,A:U,2788,0.105686125852919
9,C:A,90,0.0034116755117513
10,C:C,30,0.0011372251705837


## R48

In [58]:
best_freqs %>% filter(positions == "X48", !(feature %in% c("C", "U", "Pyrimidine")))

identities %>% filter(!restrict, isotype == "Ala") %>% select(clade, isotype, X48) %>% 
  group_by(clade, isotype, X48) %>% tally %>% spread(X48, n, 0)

Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Spermatophyta,Ala,X48,201,A,194,0.965174129353234
2,Insecta,Ala,X48,93,Amino,92,0.989247311827957
3,Vertebrata,Ala,X48,306,Amino,300,0.980392156862745
4,Mammalia,Ala,X48,1034,Amino,988,0.955512572533849


Unnamed: 0,clade,isotype,A,C,G,U
1,Fungi,Ala,24,269,4,1
2,Insecta,Ala,31,61,0,1
3,Mammalia,Ala,182,806,27,19
4,Nematoda,Ala,5,92,0,1
5,Spermatophyta,Ala,194,6,1,0
6,Vertebrata,Ala,52,248,1,5


## R52:Y62

In [59]:
best_freqs %>% filter(positions == "X52.62", !(feature %in% c("GC", "AU", "PurinePyrimidine")))

identities %>% filter(!restrict) %>% select(clade, species, isotype, X52.62) %>% 
  count(clade, isotype, X52.62) %>% spread(X52.62, n, 0)

identities %>% filter(!restrict) %>% select(clade, species, isotype, X52.62) %>% 
  count(clade, species, X52.62) %>% spread(X52.62, n, 0) %>%
  filter((`G:C` + `A:U`) / (`A:C` + `A:G` + `A:U` + `C:A` + `C:C` + `C:G` + `C:U` + `G:-` + `G:A` + `G:C` + `G:G` + `G:U` + `U:A` + `U:C` + `U:G` + `U:U`) < 0.9)

Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Vertebrata,Trp,X52.62,137,UA,135,0.985401459854015
2,Mammalia,Trp,X52.62,249,UA,231,0.927710843373494
3,Insecta,Trp,X52.62,36,UA,33,0.916666666666667
4,Fungi,Trp,X52.62,114,StrongPair,112,0.982456140350877
5,Fungi,Pro,X52.62,229,StrongPair,223,0.973799126637555
6,Spermatophyta,Phe,X52.62,88,WeakPair,84,0.954545454545455
7,Fungi,iMet,X52.62,83,StrongPair,76,0.91566265060241
8,Fungi,Ala,X52.62,298,StrongPair,269,0.902684563758389
9,Nematoda,Trp,X52.62,29,PyrimidinePurine,27,0.931034482758621
10,Fungi,Phe,X52.62,148,Paired,148,1.0


Unnamed: 0,clade,isotype,-:C,A:A,A:C,A:G,A:U,C:A,C:C,C:G,C:U,G:-,G:A,G:C,G:G,G:U,U:A,U:C,U:G,U:U
1,Fungi,Ala,0,0,1,0,18,0,0,21,0,0,0,248,0,0,7,0,3,0
2,Fungi,Arg,0,0,4,0,29,0,0,9,0,0,0,400,0,0,3,0,1,0
3,Fungi,Asn,0,0,0,0,128,0,0,0,0,0,0,21,0,0,0,0,0,0
4,Fungi,Asp,0,0,0,0,9,0,0,0,0,0,0,96,0,0,0,0,0,0
5,Fungi,Cys,0,0,2,0,34,0,0,15,0,0,0,38,0,0,28,0,0,0
6,Fungi,Gln,0,0,12,0,15,0,0,0,0,0,0,212,0,0,0,0,0,0
7,Fungi,Glu,0,0,0,0,6,0,0,0,0,0,0,235,0,0,0,0,0,0
8,Fungi,Gly,0,0,1,0,9,0,0,21,0,0,0,214,0,22,13,1,0,0
9,Fungi,His,0,0,4,0,40,0,0,2,0,0,0,52,0,21,16,1,0,1
10,Fungi,Ile,0,0,0,0,71,0,0,0,0,0,0,119,0,0,0,0,0,0


Unnamed: 0,clade,species,-:C,A:A,A:C,A:G,A:U,C:A,C:C,C:G,C:U,G:-,G:A,G:C,G:G,G:U,U:A,U:C,U:G,U:U
1,Fungi,ashbGoss_ATCC10895,0,0,1,0,18,0,0,0,0,0,0,47,0,3,4,0,0,0
2,Fungi,aspeFumi_AF293,0,0,1,0,11,0,0,9,0,0,0,57,0,0,4,0,0,0
3,Fungi,aspeNidu_FGSC_A4,0,0,0,0,15,0,0,12,0,0,0,54,0,0,2,0,1,0
4,Fungi,botrCine_B05_10,0,0,0,0,9,0,0,7,0,0,0,71,0,0,14,0,0,0
5,Fungi,candGlab_CBS_138,0,0,0,0,14,0,0,1,0,0,0,29,0,3,1,0,0,0
6,Fungi,chaeTher_VAR_THERMOPHILUM_DSM1,0,0,0,0,40,0,0,19,0,0,0,91,0,0,5,0,0,0
7,Fungi,crypGatt_WM276,0,0,0,0,10,0,0,7,0,0,0,45,0,0,3,0,0,0
8,Fungi,crypNeof_VAR_GRUBII_H99,0,0,0,0,11,0,0,8,0,0,0,47,0,0,4,0,0,0
9,Fungi,crypNeof_VAR_NEOFORMANS_B_3501,0,0,0,0,10,0,0,7,0,0,0,48,0,0,4,0,0,0
10,Fungi,crypNeof_VAR_NEOFORMANS_JEC21,0,0,0,0,10,0,0,7,0,0,0,47,0,0,4,0,0,0


In [60]:
# check no. isotypes without R52:Y52
identities %>% filter(!restrict) %>% count(species, isotype, X52.62) %>% spread(X52.62, n, fill = 0) %>% 
  filter(`G:C` == 0 & `A:U` == 0) %>% 
  ungroup %>% select(isotype) %>% unique

# check no. species with all tRNAs of an isotype without R52:Y52
identities %>% filter(!restrict) %>% count(species, isotype, X52.62) %>% spread(X52.62, n, fill = 0) %>% 
  filter(`G:C` == 0 & `A:U` == 0) %>% 
  ungroup %>% select(species) %>% unique

Unnamed: 0,isotype
1,Trp
2,Phe
3,Tyr
4,Cys
5,Pro
6,His
7,iMet
8,Thr
9,Met


Unnamed: 0,species
1,ailMel1
2,anoCar2
3,anoGam2
4,apiMel1
5,araTha1
6,ashbGoss_ATCC10895
7,aspeFumi_AF293
8,aspeNidu_FGSC_A4
9,aspeOryz_RIB40
10,bomTer1


## G53:C61

In [63]:
best_freqs %>% filter(positions == "X53.61", feature != "GC")

clade_iso_freqs %>% filter(positions == "X53.61", isotype == "Pro", clade == "Nematoda", freq > 0.02)

identities %>% filter(!restrict, isotype == "Pro", clade == "Nematoda") %>% 
  select(species_long, X53.61) %>% count(species_long, X53.61) %>% spread(X53.61, n, 0)

Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Nematoda,Pro,X53.61,87,Paired,86,0.988505747126437


Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Nematoda,Pro,X53.61,87,GC,75,0.862068965517241
2,Nematoda,Pro,X53.61,87,UA,10,0.114942528735632
3,Nematoda,Pro,X53.61,87,Paired,86,0.988505747126437
4,Nematoda,Pro,X53.61,87,PurinePyrimidine,75,0.862068965517241
5,Nematoda,Pro,X53.61,87,PyrimidinePurine,10,0.114942528735632
6,Nematoda,Pro,X53.61,87,StrongPair,75,0.862068965517241
7,Nematoda,Pro,X53.61,87,WeakPair,10,0.114942528735632


Unnamed: 0,X53.61,n
1,A:C,1
2,G:C,75
3,G:U,1
4,U:A,10


Unnamed: 0,species_long,A:C,G:C,G:U,U:A
1,Brugia malayi,0,2,0,1
2,Caenorhabditis brenneri (WUGSC 6.0.1 Feb 2008),1,11,0,0
3,Caenorhabditis briggsae (C. briggsae Jan. 2007 WUGSC 1.0/cb3),0,12,0,0
4,Caenorhabditis elegans (C. elegans Feb 2013 WBcel235/ce11),0,12,0,0
5,Caenorhabditis japonica (WUGSC 3.0.2 Mar 2008),0,8,1,0
6,Caenorhabditis remanei (WUGSC 15.0.1 May 2007),0,8,0,0
7,Heterodera glycines (soybean cyst nematode) (OP25),0,3,0,0
8,Heterorhabditis bacteriophora M31e,0,4,0,0
9,Loa loa (V3.1),0,1,0,2
10,Panagrellus redivivus MT8872,0,1,0,2


## U54:A58

In [64]:
best_freqs %>% filter(positions == "X54.58", feature != "UA")

Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Fungi,iMet,X54.58,83,Mismatched,83,1.0
2,Insecta,iMet,X54.58,28,Mismatched,28,1.0
3,Mammalia,iMet,X54.58,88,Mismatched,88,1.0
4,Nematoda,iMet,X54.58,18,Mismatched,18,1.0
5,Spermatophyta,iMet,X54.58,45,Mismatched,45,1.0
6,Vertebrata,iMet,X54.58,71,Mismatched,69,0.971830985915493
7,Insecta,Lys,X54.58,69,Paired,66,0.956521739130435
8,Spermatophyta,His,X54.58,39,Mismatched,37,0.948717948717949


In [65]:
# iMets in general have what kind of mismatch?
identities %>% filter(!restrict, isotype == "iMet") %>% select(clade, X54.58) %>%
  group_by(clade, X54.58) %>% tally %>% spread(X54.58, n, 0)

Unnamed: 0,clade,A:A,C:A,U:A
1,Fungi,83,0,0
2,Insecta,28,0,0
3,Mammalia,88,0,0
4,Nematoda,18,0,0
5,Spermatophyta,45,0,0
6,Vertebrata,68,1,2


In [66]:
# Insect Lys
identities %>% filter(isotype == "Lys", clade == "Insecta") %>% select(species_long, X54.58) %>%
  group_by(species_long, X54.58) %>% tally %>% spread(X54.58, n, 0)

Unnamed: 0,species_long,C:G,U:A,U:C,U:G,U:U
1,Anopheles gambiae (AgamP4),0,24,0,0,0
2,Apis mellifera (DH4) (honey bee),0,13,0,0,0
3,Bombus terrestris (buff-tailed bumblebee),0,10,0,0,0
4,Drosophila ananassae (D. ananassae Feb. 2006 Agencourt CAF1),0,19,0,3,0
5,Drosophila erecta (D. erecta Feb. 2006 Agencourt CAF1),0,16,0,0,0
6,Drosophila grimshawi (D. grimshawi Feb. 2006 Agencourt CAF1),0,18,0,0,0
7,Drosophila melanogaster (D. melanogaster Aug. 2014 BDGP Release 6 + ISO1 MT/dm6),0,19,0,0,0
8,Drosophila mojavensis (D. mojavensis Feb. 2006 Agencourt CAF1),0,17,0,1,0
9,Drosophila persimilis (D. persimilis Oct. 2005 Broad),0,17,0,0,1
10,Drosophila pseudoobscura (D. pseudoobscura Feb. 2006),0,17,0,0,0


In [67]:
# Plant His
identities %>% filter(isotype == "His", clade == "Spermatophyta") %>% select(species_long, X54.58) %>%
  group_by(species_long, X54.58) %>% tally %>% spread(X54.58, n, 0)

Unnamed: 0,species_long,C:A,U:A
1,Arabidopsis thaliana (TAIR10 Feb 2011),9,0
2,Brachypodium distachyon Bd21 (JGI v3.0),10,0
3,Brassica oleracea var. oleracea TO1000,13,1
4,Carica papaya (papaya),6,0
5,Cucumis sativus (cucumber),12,0
6,Fragaria vesca (strawberry) (v2.0.a1),11,0
7,Glycine max (soybean) (Wm82.a2),17,0
8,Gossypium raimondii (Cotton v2),13,0
9,Malus x domestica (apple v3.0.a1),21,0
10,Manihot esculenta (Cassava v6),17,0


In [70]:
# Nematode position 54
identities %>% filter(!restrict) %>% select(clade, isotype, species, X54, X58) %>% filter(isotype == "Ala", clade == "Nematoda") %>%
  count(species, X54, X58)
identities %>% filter(species == "triSpi1" & isotype == "Ala" & clade == "Nematoda")
# found by M&G
identities %>% filter(species == "araTha1" & isotype == "His") %>% select(score, X54, X55, X56, X57, X58, X59)

Unnamed: 0,species,X54,X58,n
1,bruMal1,U,A,5
2,caeJap1,U,A,8
3,caePb2,U,A,9
4,caeRem3,U,A,7
5,cb3,-,A,1
6,cb3,U,A,11
7,ce11,U,A,9
8,hetBac1,U,A,6
9,hetGly1,U,A,3
10,loaLoa1,U,A,5


Unnamed: 0,clade,domain,isotype,seqname,species,species_long,taxid,isotype_best,anticodon,score,⋯,X70i8,X70i9,X71,X71i1,X71i2,X72,X73,X74,X75,X76
1,Nematoda,eukaryota,Ala,triSpi1_GL622787.1.trna1-AlaAGC,triSpi1,Trichinella spiralis (ISS 195),.,Ala,AGC,60.2,⋯,.,.,C,.,.,C,A,-,-,-
2,Nematoda,eukaryota,Ala,triSpi1_GL622787.1.trna17-AlaTGC,triSpi1,Trichinella spiralis (ISS 195),.,Ala,TGC,61.6,⋯,.,.,C,.,.,C,A,-,-,-
3,Nematoda,eukaryota,Ala,triSpi1_GL622787.1.trna44-AlaTGC,triSpi1,Trichinella spiralis (ISS 195),.,Ala,TGC,61.6,⋯,.,.,C,.,.,C,A,-,-,-
4,Nematoda,eukaryota,Ala,triSpi1_GL622787.1.trna62-AlaTGC,triSpi1,Trichinella spiralis (ISS 195),.,Ala,TGC,66.7,⋯,.,.,C,.,.,C,A,-,-,-
5,Nematoda,eukaryota,Ala,triSpi1_GL624340.1.trna3-AlaCGC,triSpi1,Trichinella spiralis (ISS 195),.,Ala,CGC,61.5,⋯,.,.,C,.,.,C,A,-,-,-
6,Nematoda,eukaryota,Ala,triSpi1_GL624340.1.trna5-AlaAGC,triSpi1,Trichinella spiralis (ISS 195),.,Ala,AGC,59.8,⋯,.,.,C,.,.,C,A,-,-,-


Unnamed: 0,score,X54,X55,X56,X57,X58,X59
1,63.0,C,U,C,G,A,A
2,63.0,C,U,C,G,A,A
3,56.8,C,U,C,G,A,A
4,63.0,C,U,C,G,A,A
5,56.8,C,U,C,G,A,A
6,63.0,C,U,C,G,A,A
7,63.0,C,U,C,G,A,A
8,63.0,C,U,C,G,A,A
9,63.0,C,U,C,G,A,A


## Y60

In [73]:
best_freqs %>% filter(positions == "X60", !(feature %in% c("Pyrimidine", "U", "C")))

Unnamed: 0,clade,isotype,positions,clade_iso_pos_total,feature,n,freq
1,Fungi,iMet,X60,83,A,83,1.0
2,Insecta,iMet,X60,28,A,28,1.0
3,Mammalia,iMet,X60,88,A,88,1.0
4,Nematoda,iMet,X60,18,A,18,1.0
5,Spermatophyta,iMet,X60,45,A,45,1.0
6,Vertebrata,iMet,X60,71,A,70,0.985915492957746
7,Vertebrata,Val,X60,301,Amino,295,0.980066445182724
8,Spermatophyta,Ile,X60,184,Amino,177,0.96195652173913
9,Mammalia,Val,X60,577,Amino,538,0.932409012131716
10,Vertebrata,Ala,X60,306,B,305,0.996732026143791


In [80]:
identities %>% filter(!restrict) %>% count(clade, isotype, X60) %>% spread(isotype, n, 0) %>%
  kable %>% paste(collapse = '\n') %>% display_markdown

|clade         |X60 | Ala| Arg| Asn| Asp| Cys| Gln| Glu| Gly| His| Ile| iMet| Leu| Lys| Met| Phe| Pro| Ser| Thr| Trp| Tyr| Val|
|:-------------|:---|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|----:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
|Fungi         |-   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|    0|   0|   3|   0|   0|   0|   0|   0|   0|   0|   0|
|Fungi         |A   |   0|   5|   0|   0|   0|   0|   0|   0|   0|   2|   83|   3|   0|   3|   0|   0|   0|   0|   0|   0|   5|
|Fungi         |C   |  54| 147| 129|   0|  65|  65|   5|  12|   0| 172|    0| 125| 188|  86| 143| 105|  19|  91|   6|  25| 320|
|Fungi         |G   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|    0|   0|   0|   2|   0|   0|   0|   0|   0|   0|   0|
|Fungi         |U   | 244| 294|  20| 105|  52| 174| 236| 269| 137|  16|    0| 331|  38|  30|   5| 124| 383| 238| 108| 105|  12|
|Insecta       |A   |  15|   0|   0|   0|   0|   0|   0|   0|   0|   0|   28|   0|   0|   0|   0|   0|   0|   0|   0|   0|   3|
|Insecta       |C   |  45|  34|  12|   0|  24|   0|   0|   0|   0|  80|    0|  35|  30|  39|  31|   0|   0|  49|   0|   0| 110|
|Insecta       |G   |  16|   0|   0|   0|   0|   0|   0|   0|   0|   0|    0|   0|   1|   0|   0|   0|   0|   0|   0|   0|   0|
|Insecta       |U   |  17| 128|  20|  36|  29|  89|  75|  87|  23|   3|    0| 121|  38|   1|   2|  87| 131|  61|  36| 103|   4|
|Mammalia      |-   |   1|   0|   0|   0|   2|   0|   0|   0|   0|   2|    0|   0|   1|   1|   1|   0|   0|   0|   0|   0|   1|
|Mammalia      |A   |  26|   0|   5|   1|   0|   0|   1|   0|   0|   2|   88|   1|   4|   0|   1|   0|   3|   0|   0|   0| 315|
|Mammalia      |C   | 716|  27| 277|  28|  13|   2| 137|   3|   1| 368|    0| 162| 540| 272| 250|   3|  44| 121|  13|   0| 223|
|Mammalia      |G   | 187|   0|   0|   1|   0|   1|   0|   0|   0|   1|    0|   0|   2|   0|   0|   0|   1|   0|   0|   0|   5|
|Mammalia      |U   | 104| 852|  11| 137| 613| 347| 336| 542|  77|  26|    0| 556| 318|  10|   9| 217| 644| 560| 236| 307|  33|
|Nematoda      |-   |   0|   0|   0|   0|   0|   0|   0|   0|   0|   1|    0|   0|   0|   0|   0|   0|   0|   0|   0|   0|   0|
|Nematoda      |A   |   5|   1|   0|   1|   0|   0|   0|   0|   0|   0|   18|   0|   3|   0|   0|   0|   0|   1|   0|   0|   0|
|Nematoda      |C   |  16|  42|  38|   0|   3|   0|   0|   1|   0|  60|    0|  44|  58|  28|  40|  66|  22|  52|  17|   0|  96|
|Nematoda      |G   |   0|   1|   0|   0|   0|   0|   0|   0|   0|   0|    0|   0|   0|   0|   0|   0|   1|   0|   0|   0|   0|
|Nematoda      |U   |  77| 110|   1|  29|  23|  66|  81| 107|  30|   8|    0| 108|  35|   2|   0|  21| 131|  65|  12|  46|  15|
|Spermatophyta |-   |   0|   0|   1|   1|   0|   0|   0|   0|   0|   3|    0|   0|   1|   0|   2|   0|   0|   1|   1|   0|   0|
|Spermatophyta |A   |  82|   1|   1|   2|   1|   0|   0|   0|   0| 115|   45|   1|   4|   0|   0|   0|   0|   0|   1|   0|   1|
|Spermatophyta |C   |  55| 127| 125|  99|  86|   2|   0|   3|   0|  62|    0|  81| 145|  74|  69|   0|  70| 125|   1|   0| 156|
|Spermatophyta |G   |   3|   0|   0|   2|   0|   0|   1|   1|   0|   1|    0|   2|   3|   0|   1|   0|   0|   1|   1|   0|   0|
|Spermatophyta |U   |  61| 196|  64|   4|  28| 159| 155| 190|  39|   3|    0| 214|  70|  20|  16| 181| 250| 122|  66| 142|  55|
|Vertebrata    |-   |   0|   0|   0|   0|   0|   1|   0|   0|   0|   1|    0|   0|   1|   0|   2|   0|   0|   0|   0|   0|   1|
|Vertebrata    |A   |   1|   0|   2|   1|   0|   0|   0|   0|   0|  13|   70|   1|   0|   1|   6|   0|   3|   1|   0|   0| 231|
|Vertebrata    |C   | 202|   5| 185|   0|   4|   0|   2|   1|   1| 204|    1| 111| 152| 140| 183|   2|  35|  69|   1|   0|  64|
|Vertebrata    |G   |  50|   0|   0|   0|   0|   1|   0|   0|   0|   2|    0|   0|   0|   1|   2|   1|   1|   0|   0|   0|   1|
|Vertebrata    |U   |  53| 367|   1|  74| 190| 214| 275| 261|  84|   5|    0| 347| 162|   2|   1| 182| 379| 306| 136| 187|   4|

In [74]:
identities %>% filter(!restrict, isotype == "Val", clade == "Mammalia") %>% select(species, X60) %>%
  group_by(species, X60) %>% tally %>% spread(X60, n, 0) %>% filter(A != 0 | G != 0)

Unnamed: 0,species,-,A,C,G,U
1,ailMel1,0,2,5,0,3
2,balAcu1,0,10,3,0,1
3,bosTau8,0,19,4,0,0
4,calJac3,0,5,3,0,0
5,canFam3,0,4,4,1,2
6,cavPor3,0,3,3,0,0
7,cerSim1,0,12,5,0,2
8,criGri1,0,9,4,0,0
9,dasNov3,0,8,5,0,2
10,dipOrd1,0,3,3,0,3


In [75]:
identities %>% filter(!restrict, isotype == "Ile", clade == "Spermatophyta") %>% select(species, X60) %>%
  group_by(species, X60) %>% tally %>% spread(X60, n, 0) %>% filter(A != 0 | G != 0)

Unnamed: 0,species,-,A,C,G,U
1,araTha1,0,1,3,0,0
2,braDis3,0,6,3,0,0
3,braOle1,1,12,10,1,0
4,carPap1,0,2,4,0,0
5,cucSat1,0,8,2,0,0
6,fraVes1,0,3,3,0,0
7,glyMax2,0,11,3,0,1
8,gosRai2,0,9,5,0,0
9,malDom1,0,5,2,0,0
10,manEsc6,0,7,5,0,1


# Ala

In [None]:
best_freqs %>% filter(positions == 'X3.70', feature == "GU")

In [None]:
# Exceptions to G3:U70
identities %>% select(clade, species, isotype, X3.70) %>% filter(isotype == "Ala") %>%
  group_by(species) %>% mutate(ntRNAs = n()) %>%
  group_by(clade, species, ntRNAs, X3.70) %>% tally() %>% spread(X3.70, n, fill='.') %>% 
  filter(as.integer(`G:U`)/ntRNAs < 0.9) %>%
  kable %>% paste(collapse = '\n') %>% display_markdown

# related: G1:C72, which vertebrates?
identities %>% select(clade, species, isotype, X1.72) %>% filter(isotype == 'Ala', clade == "Vertebrata") %>%
  group_by(clade, species, X1.72) %>% tally() %>% spread(X1.72, n, fill='.')
identities %>% select(clade, quality, species, isotype, X1.72) %>% 
  filter(isotype == 'Ala', species == "calMil1") %>%
  group_by(species, quality, X1.72) %>% tally()

In [None]:
# Pos. 73 for insects
identities %>% select(clade, restrict, isotype, X73) %>% filter(isotype == "Ala", clade == 'Insecta') %>%
  group_by(restrict, X73) %>% tally() %>% spread(X73, n, fill='.')

# Arg

In [None]:
#G36 and U36
identities %>% filter(isotype == "Arg") %>% select(clade, X36) %>% 
  group_by(clade, X36) %>% tally() %>% spread(X36, n, fill='.')

In [None]:
# N73 is clade-specific
identities %>% filter(isotype == "Arg") %>% select(clade, X73) %>% 
  group_by(clade, X73) %>% tally() %>% spread(X73, n, fill='.')

# species-specific?
identities %>% filter(isotype == "Arg") %>% select(clade, species, X73) %>% 
  group_by(clade, species, X73) %>% tally() %>% spread(X73, n, fill='.') %>%
  kable %>% paste(collapse = '\n') %>% display_markdown

# Asn

In [None]:
# G1
identities %>% select(clade, isotype, X1.72) %>% filter(isotype == 'Asn') %>%
  group_by(clade, X1.72) %>% tally() %>% spread(X1.72, n, fill='.')

In [None]:
# Y60 transition to C60
identities %>% select(clade, isotype, X60) %>% filter(isotype == "Asn") %>%
  group_by(clade, isotype, X60) %>% tally() %>% spread(X60, n, fill='.')

identities %>% select(clade, species_long, isotype, X60) %>% filter(isotype == "Asn", clade == "Nematoda") %>%
  group_by(clade, species_long, isotype, X60) %>% tally() %>% spread(X60, n, fill='.') %>% 
    kable %>% paste(collapse = '\n') %>% display_markdown

# Asp

In [None]:
# 1:72
identities %>% select(clade, isotype, X1.72) %>% filter(isotype == "Asp") %>%
  group_by(clade, X1.72) %>% tally %>% spread(X1.72, n, fill='.')

In [None]:
# G10:U25
identities %>% select(clade, isotype, X10.25) %>% filter(isotype == "Asp") %>%
  group_by(clade, X10.25) %>% tally %>% spread(X10.25, n, fill='.')
identities %>% select(clade, species_long, isotype, X10.25) %>% filter(isotype == "Asp") %>%
  group_by(clade, species_long, X10.25) %>% tally %>% spread(X10.25, n, fill='.') %>%
  filter(!(`G:C` == '.' & `A:U` == "." & `G:A` == ".")) 

In [None]:
# C38 in plants / insects
identities %>% select(clade, species, isotype, X34) %>% filter(isotype == "Asp", clade == "Insecta") %>%
  group_by(clade, species, X34) %>% tally %>% spread(X34, n, fill = '.')
identities %>% select(clade, species, isotype, X38) %>% filter(isotype == "Asp", clade %in% c("Insecta", "Spermatophyta")) %>%
  group_by(clade, species, X38) %>% tally %>% spread(X38, n, fill = '.')

In [None]:
# G73 in mammals
identities %>% select(restrict, clade, species, isotype, X73) %>% filter(!restrict, clade == "Mammalia", isotype == "Asp") %>%
  group_by(species, X73) %>% tally %>% spread(X73, n, fill = '.') %>% filter(A != '.')
identities %>% filter(!restrict, species %in% c("oviAri3", "rheMac8", "turTru2"), isotype == "Asp", X73 == "A")

# Cys

In [None]:
# U73
identities %>% select(restrict, clade, species, isotype, X73) %>% filter(!restrict, clade == "Insecta", isotype == "Cys") %>%
  group_by(X73) %>% tally

# Gln

In [None]:
# N73
identities %>% select(restrict, clade, species_long, isotype, X73) %>% filter(!restrict, clade == "Fungi", isotype == "Gln") %>%
  group_by(species_long, X73) %>% tally %>% spread(X73, n, '.') %>% filter(`A` != '.' | `C` != '.')

# Glu

In [None]:
# U9
identities %>% select(restrict, clade, species_long, isotype, X9) %>%
  filter(!restrict, clade == "Insecta", isotype == "Glu") %>%
  group_by(species_long, X9) %>% tally %>% spread(X9, n, '.')
identities %>% select(restrict, clade, species_long, isotype, X9) %>%
  filter(!restrict, clade == "Spermatophyta", isotype == "Glu") %>%
  group_by(species_long, X9) %>% tally %>% spread(X9, n, '.')

In [None]:
# R73
identities %>% select(restrict, clade, species_long, isotype, X73) %>%
  filter(!restrict, clade == "Fungi", isotype == "Glu") %>%
  group_by(species_long, X73) %>% tally %>% spread(X73, n, '.')
identities %>% select(restrict, clade, species_long, isotype, X73) %>%
  filter(!restrict, clade == "Spermatophyta", isotype == "Glu") %>%
  group_by(species_long, X73) %>% tally %>% spread(X73, n, '.')

# Gly

In [None]:
# C2:G71 is present alongisde G:U in fungi
identities %>% select(restrict, clade, species_long, isotype, X2.71) %>%
  filter(!restrict, clade == "Fungi", isotype == "Gly") %>%
  group_by(species_long, X2.71) %>% tally %>% spread(X2.71, n, '.') %>%
  kable %>% paste(collapse = '\n') %>% display_markdown

identities %>% select(restrict, clade, species_long, isotype, X2.71) %>%
  filter(!restrict, clade == "Fungi", isotype == "Gly") %>%
  group_by(species_long, X2.71) %>% tally %>% spread(X2.71, n, '.') %>% filter(`C:G` == '.')

In [None]:
# A73 is not conserved in insects
identities %>% select(restrict, clade, species_long, isotype, X73) %>% 
  filter(!restrict, clade == "Insecta", isotype == "Gly") %>%
  group_by(species_long, X73) %>% tally %>% spread(X73, n, '.')

# His

In [None]:
# A73 and U73
identities %>% select(restrict, clade, species_long, isotype, X73) %>%
  filter(!restrict, clade == "Fungi", isotype == "His") %>%
  group_by(species_long, X73) %>% tally %>% spread(X73, n, '.') %>% filter(C != '.' | U != '.')

# Ile

In [None]:
# A34, U34
identities %>% select(restrict, clade, species_long, isotype, X34) %>%
  filter(!restrict, isotype == "Ile") %>%
  group_by(species_long, X34) %>% tally %>% spread(X34, n, '.') %>% 
  kable %>% paste(collapse = '\n') %>% display_markdown

identities %>% select(restrict, clade, species_long, isotype, X34) %>%
  filter(isotype == "Ile") %>%
  group_by(species_long, X34) %>% tally %>% spread(X34, n, '.') %>% filter(A == '.' | U == '.')

In [None]:
# A73 is muddled in insects
identities %>% select(restrict, clade, species_long, isotype, X73) %>%
  filter(!restrict, isotype == "Ile") %>%
  group_by(species_long, X73) %>% tally %>% spread(X73, n, '.') %>% filter(G != '.')

# Leu

In [None]:
# 3:70, 4:69, 5:68
identities %>% select(restrict, clade, isotype, X3.70) %>%
  filter(!restrict, isotype == "Leu") %>%
  group_by(clade, X3.70) %>% tally %>% filter(n > 1) %>% spread(X3.70, n, '.')
identities %>% select(restrict, clade, isotype, X4.69) %>%
  filter(!restrict, isotype == "Leu") %>%
  group_by(clade, X4.69) %>% tally %>% filter(n > 1) %>% spread(X4.69, n, '.')
identities %>% select(restrict, clade, isotype, X5.68) %>%
  filter(!restrict, isotype == "Leu") %>%
  group_by(clade, X5.68) %>% tally %>% filter(n > 1) %>% spread(X5.68, n, '.')


In [None]:
# C20a
identities %>% select(restrict, clade, isotype, X20a) %>%
  filter(!restrict, isotype == "Leu") %>%
  group_by(clade, X20a) %>% tally %>% filter(n > 1) %>% spread(X20a, n, '.')

# Lys

In [None]:
# N73 nonspecificity
identities %>% select(restrict, clade, isotype, X73) %>% filter(!restrict, isotype == "Lys") %>%
  group_by(clade, X73) %>% tally %>% spread(X73, n, '.')

In [None]:
identities %>% filter(!restrict, clade == "Mammalia", isotype == "Lys", species == "hg19") %>% select(seqname, X18.55, score, isoscore)

In [None]:
# 18:55 in mammals
identities %>% filter(!restrict, clade == "Mammalia", isotype == "Lys") %>% count(species, X18.55) %>%
  spread(X18.55, n, fill = 0)

# Met

# iMet

In [None]:
# G21
identities %>% select(restrict 

In [None]:
# 29:41
identities %>% select(restrict, clade, X29.41, isotype) %>% filter(isotype == "iMet") %>%
  group_by(clade, X29.41) %>% tally %>% spread(X29.41, n, '.')

identities %>% select(restrict, clade, species_long, X29.41, isotype) %>% filter(clade %in% c("Fungi", "Nematoda"), isotype == "iMet") %>%
  group_by(species_long, X29.41) %>% tally %>% spread(X29.41, n, '.') %>% filter(`A:U` != '.')

In [None]:
# lack of U54
identities %>% select(restrict, clade, species_long, X54, isotype) %>% filter(isotype == "iMet") %>%
  group_by(species_long, X54) %>% tally %>% spread(X54, n, '.') %>% filter(C != '.' | U != '.')

In [None]:
identities %>% filter(str_detect(species_long, "Encephalito")) %>% filter(isotype == 'Met') %>%
kable %>% paste(collapse='\n') %>% display_markdown

# Phe

In [None]:
# G20
identities %>% select(restrict, clade, species_long, X20, isotype) %>% filter(isotype == "Phe", clade %in% c('Spermatophyta', 'Fungi')) %>%
  group_by(clade, species_long, X20) %>% tally %>% spread(X20, n, 0) %>% filter(A != 0 | U != 0)

# Pro

In [None]:
# 13:22 mismatch
identities %>% select(restrict, clade, species_long, X13.22, isotype) %>% filter(isotype == "Pro") %>%
  group_by(X13.22, species_long) %>% tally %>% spread(X13.22, n, 0) %>%
  kable %>% paste(collapse = '\n') %>% display_markdown

In [None]:
# Botrytis cinerea has a lot of C13:G22s
identities %>% filter(isotype == "Pro", species == "botrCine_B05_10") %>%
  select(clade, isotype, species, species_long, isotype_best, anticodon, score, isoscore, isoscore_ac, quality, restrict, X13.22)

# Is TGG enriched for paired identities?
identities %>% filter(isotype == "Pro") %>% group_by(anticodon, X13.22) %>% tally %>% spread(X13.22, n, 0)

## Ser

In [None]:
identities %>% select(restrict, clade, V.arm, isotype) %>% filter(!restrict, isotype %in% c("Leu", "Ser")) %>%
  group_by(isotype, clade) %>% summarize(n = mean(V.arm)) %>%
  ggplot() + geom_bar(aes(y = n, x = isotype, fill = clade), position = 'dodge', stat = 'identity')

# Thr

In [None]:
identities %>% filter(isotype == "Thr") %>% select(clade, X73) %>% group_by(clade, X73) %>% tally %>% spread(X73, n, 0)

# Trp

In [None]:
identities %>% filter(isotype == "Trp") %>% select(clade, species_long, X35) %>% group_by(species_long, X35) %>%
  tally %>% spread(X35, n, 0) %>% kable %>% paste(collapse = '\n') %>% display_markdown

# Tyr

In [None]:
# 1:72
identities %>% select(restrict, species_long, clade, isotype, X1.72, X2.71) %>% filter(!restrict, clade %in% c("Fungi", "Nematoda"), isotype == "Tyr") %>%
  group_by(species_long, X1.72) %>% tally %>% spread(X1.72, n, 0) %>% filter(sum(`A:G`, `A:U`, `C:A`, `C:G`, `G:C`, `G:U`, `U:A`) != `C:G`)

In [None]:
identities %>% filter(species == "caePb2", isotype == "Tyr") %>% select(-seqname) %>% unique %>% kable %>% paste(collapse = '\n') %>% display_markdown

# Val

In [None]:
# A/U 73
identities %>% select(restrict, clade, species_long, isotype, X73) %>% filter(clade %in% c("Fungi", "Nematoda"), isotype == "Val") %>%
  group_by(X73, clade) %>% tally %>% spread(X73, n, 0)

In [None]:
# species breakdown A/U 73
identities %>% select(restrict, clade, species_long, isotype, X73) %>% filter(clade %in% c("Fungi", "Nematoda"), isotype == "Val") %>%
  group_by(X73, species_long, clade) %>% tally %>% spread(X73, n, 0) %>% filter(G != 0 | U != 0) %>% arrange(clade)


# Misc sequence features

## 3D interaction freqs

In [None]:
# 9:23
identities %>% select(restrict, clade, species_long, isotype, X9.23) %>% filter(!restrict) %>%
  group_by(clade, X9.23) %>% tally %>% spread(X9.23, n, fill='.')

# 10:45
identities %>% select(restrict, clade, species_long, isotype, X10.45) %>% filter(!restrict) %>%
  group_by(clade, X10.45) %>% tally %>% spread(X10.45, n, fill='.')

# 22:46
identities %>% select(restrict, clade, species_long, isotype, X22.46) %>% filter(!restrict) %>%
  group_by(clade, X22.46) %>% tally %>% spread(X22.46, n, fill='.')

In [None]:
identities %>% filter(!restrict) %>% select(intron) %>% filter(intron == 0) %>% nrow

## Intron Y32 R37 requirements

In [None]:
# Y32 divergence
identities %>% filter(!restrict, X32 %in% c("A", "G")) %>% 
  select(clade, species_long, seqname, isotype, X32, intron) %>% filter(intron != 0) %>% spread(X32, X32, '-')

# R37 divergence
identities %>% filter(!restrict, X37 %in% c("C", "U")) %>% 
  select(clade, species_long, seqname, isotype, X37, intron) %>% filter(intron != 0) %>% spread(X37, X37, '-')

# avg. score and isoscore_ac
identities %>% filter(!restrict, (X37 %in% c("C", "U")) | (X32 %in% c("A", "G"))) %>% 
  select(clade, species_long, seqname, isotype, score, isoscore_ac, X32, X37, intron) %>% filter(intron != 0)


## Ile-GAT sparing

In [None]:
identities %>% select(clade, isotype, anticodon, species, score, isoscore_ac) %>% 
  filter(isotype == "Ile", clade %in% c("Mammalia", "Insecta")) %>%
  group_by(clade, species, anticodon) %>%
  summarize(n = length(anticodon)) %>%
  spread(anticodon, n, 0) %>% filter(GAT != 0)

## Base pair mismatches

In [None]:
# 3:70 in mammals
identities %>% filter(!restrict) %>%
  select(clade, species, isotype, X3.70) %>% filter(clade == "Mammalia", isotype == "Gln") %>% 
  group_by(species, X3.70) %>% tally %>% spread(X3.70, n, 0)

In [None]:
df = identities %>% select(clade, species, isotype, X1.72) %>% filter(clade == "Vertebrata", isotype == "Ala") %>% group_by(X1.72) %>% tally

## 13:22

In [None]:
identities %>% filter(isotype %in% c("Leu", "Ser", "Val", "Pro")) %>% select(clade, species, isotype, X13.22) %>%
  group_by(clade, isotype, X13.22) %>% tally %>% spread(X13.22, n, 0)

## 27:43 in Phe

In [None]:
identities %>% filter(isotype == "Phe", clade %in% c("Fungi", "Nematoda")) %>% select(clade, species, isotype, X27.43) %>%
  group_by(clade, species, isotype, X27.43) %>% tally %>% spread(X27.43, n, 0) %>% filter(`A:G` != 0 | `C:A` != 0 | `G:A` != 0 | `U:U` != 0)

## 50:64 in Ile

In [None]:
identities %>% filter(isotype == "Ile", clade == "Insecta") %>% select(clade, species, isotype, X50.64) %>%
  group_by(clade, species, isotype, X50.64) %>% tally %>% spread(X50.64, n, 0)

In [None]:
identities %>% filter(!restrict, species == "micMur1", isotype == "Ser")  %>% select(species, seqname, score, isotype, isoscore_ac, X5.68, X6.67, X7.66)

In [None]:
identities %>% filter(!restrict, clade == "Mammalia", isotype == "Ser") %>% select(species, X5.68, X6.67, X7.66) %>%
  gather(position, feature, -species) %>% group_by(feature) %>% tally %>% spread(feature, n, 0)

identities %>% filter(!restrict, clade == "Mammalia", isotype == "Ser") %>% select(species, X5.68, X6.67, X7.66) %>%
  gather(position, feature, -species) %>% group_by(species, feature) %>% tally %>% spread(feature, n, 0)