In [2]:
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (15, 6)
mpl.style.use('ggplot')
import pandas as pd
import numpy
import subprocess
import glob
import re
import os, sys
from collections import defaultdict, Counter, OrderedDict
from Bio import SeqIO, SeqRecord, Seq
from tRNA_position import *
pd.set_option('display.max_colwidth',10000)
pd.set_option('display.width', 10000)
pd.set_option('display.max_columns', 10000)
pd.set_option('display.max_rows',1000)
isotypes = ['Ala', 'Arg', 'Asn', 'Asp', 'Cys', 'Gln', 'Glu', 'Gly', 'His', 'Ile', 'iMet', 'fMet', 'Leu', 'Lys', 'Met', 'Phe', 'Pro', 'Ser', 'Thr', 'Trp', 'Tyr', 'Val']

# Introduction

A global view of identity elements versus biological features would be a powerful tool for predicting tRNA function using primary sequence. I've done this for eukaryotic tRNAs, so this is an extension to prokaryotes.

# Process tRNAs
## Species information

In [44]:
species_table = pd.read_table('genomes-091217.tsv', header=None, names=['species', 'longname', 'domain', 'clade', 'taxid'], dtype={'taxid': str})
species_table.head()

Unnamed: 0,species,longname,domain,clade,taxid
0,acidHosp1,Acidianus hospitalis W1,archaea,Crenarchaeota,Thermoprotei
1,acidSacc1,Acidilobus saccharovorans 345-15,archaea,Crenarchaeota,Thermoprotei
2,aerPer1,Aeropyrum pernix K1,archaea,Crenarchaeota,Thermoprotei
3,caldMaqu1,Caldivirga maquilingensis IC-167,archaea,Crenarchaeota,Thermoprotei
4,desuKamc1,Desulfurococcus kamchatkensis 1221n,archaea,Crenarchaeota,Thermoprotei


The following species are removed because of a lack of `.iso` file:
- nitrMari1
- pyroIsla1
- therNeut1

In [104]:
species = sorted(glob.glob("out/*-tRNAs.out"))
species = [sp[4:-10] for sp in species]
seqs = []

for sp in species:

  tRNA_file = 'tRNAs/{}-tRNAs.fa'.format(sp)
  tscanout_file = 'out/{}-tRNAs.out'.format(sp)
  ss_file = 'ss/{}-tRNAs.ss'.format(sp)
  iso_file = 'iso/{}-tRNAs.iso'.format(sp)

  # Import annotation from tRNAscan .out files. We want to keep everything that's not "Pseudo"
  approved_tRNAs = []
  intron_lengths = [] # we are purging introns to improve alignment, so store intron lengths here
  for metadata in pd.read_table(tscanout_file, sep="\t", skiprows=3, header=None).iterrows():
    if metadata[1].iloc[-1] in ["Pseudo", "Isotype mismatch;Pseudo"]: continue
    approved_tRNAs.append('{}.trna{}-{}{}'.format(metadata[1][0].strip(), metadata[1][1], metadata[1][4], metadata[1][5]))
    if type(metadata[1][6]) == str:
      intron_length = ','.join([str(abs(int(start) - int(end)) + 1) for start, end in zip(metadata[1][6].split(','), metadata[1][7].split(','))])
    else:
      intron_length = abs(int(metadata[1][6]) - int(metadata[1][7]))
      if intron_length > 0: intron_length = intron_length + 1
    intron_lengths.append(intron_length)
      
  # Remove introns using sstofa
  subprocess.call('sstofa3 {} "" 1 0 > {}'.format(ss_file, tRNA_file), shell=True)

  # Parse isotype-specific scores file
  iso_scores = pd.read_table(iso_file, header=0)
  iso_scores['Undet'] = 0
  iso_scores['Sup'] = 0
  if 'iMet' not in iso_scores: iso_scores['iMet'] = 0
  if 'fMet' not in iso_scores: iso_scores['fMet'] = 0
  iso_scores['best'] = iso_scores.ix[:,2:].idxmax(axis=1)
  iso_scores['score'] = iso_scores.max(axis=1, numeric_only=True)
  iso_scores['ac_score'] = iso_scores.lookup(iso_scores.index, iso_scores.ix[:, 1])
  iso_scores.index = iso_scores.tRNAscanID.values
  iso_scores = iso_scores[['best', 'score', 'ac_score']]
  
  # Compile sequence information into a list
  for seq in SeqIO.parse('tRNAs/{}-tRNAs.fa'.format(sp), 'fasta'):
    if seq.id not in approved_tRNAs: continue
    if "pseudogene" in seq.description: continue
    
    # filter low-scoring tRNAs
    score = float(re.findall('Sc: [\d\.]+', seq.description)[0].split()[-1])
    if score < 50: continue
    
    # get tRNA attributes
    intron_length = intron_lengths[approved_tRNAs.index(seq.id)]
    trnascanid = re.findall('.+\.trna\d+', seq.id)[0]
    isotype = iso_scores.ix[trnascanid].best
    
    # filter selenocysteine tRNAs - these have a different 2d structure
    if isotype == 'SeC': continue
    isoscore, ac_score = iso_scores.ix[trnascanid].score, iso_scores.ix[trnascanid].ac_score
    seq.id = '{}|{} Iso: {} ({}) Iso_ac: {} Intron: {}'.format(sp, seq.description, isoscore, isotype, ac_score, intron_length)
    seq.description = ''
    seqs.append(seq)

In [105]:
fasta_handle = open('pro-tRNAs.fa', 'w')
SeqIO.write(seqs, fasta_handle, 'fasta')
fasta_handle.close()
num_model = '/projects/lowelab/users/blin/tRNAscan/models/domain-specific/pro-num-091217.cm'
subprocess.call('cmalign -g --notrunc -o pro-tRNAs.sto {} pro-tRNAs.fa'.format(num_model), shell=True)

## Create table of tRNA bases by position

This is a giant data frame with one row per tRNA, and with columns for each position, plus tRNA metadata like species and loop lengths.

In [124]:
def position_base(positions, seq):
  for position_index, position in enumerate(positions):
    if position.paired:
      index1, index2 = position.position.split(':')
      index1, index2 = int(index1), int(index2)
      base_pair = "{}:{}".format(seq[index1 - 1].upper(), seq[index2 - 1].upper())
      yield position.sprinzl, base_pair
    else:
      index = int(position.position)
      base = seq[index - 1].upper()
      yield position.sprinzl, base
      
identities = pd.DataFrame()

# get positions
alignment_fhandle = open('pro-tRNAs.sto')
positions = [] # list containing each position in the tRNA

# first, get secondary structure
# also count the number of lines for progress bar
ss_count = 0
for line in alignment_fhandle:
  if line[0:4] == "#=GR": ss_count += 1
  if line[0:12] == '#=GC SS_cons':
    ss = line.strip().split()[-1]
alignment_fhandle.close()

# parse secondary structure into regions and positions
positions = annotate_positions(ss)

# get nucleotide at each position for each tRNA by parsing Stockholm file
alignment_fhandle = open('pro-tRNAs.sto')
trnas = []
skipped = []
n = 0
for line in alignment_fhandle:
  if line[0] in ["#", '\n', '/']: continue
  
  # status update
  n += 1
  if n % int(ss_count / 10) == 0: print("{} tRNAs processed ({}%)".format(n, round(n / ss_count * 100)))
  sys.stdout.flush()
  
  # parse secondary structure
  species, desc = line.strip().split('|', 1)
  seqname = desc.split()[0]
  seq = desc.split()[-1]
  if any(species_table.species == species):
    row = species_table[species_table.species == species]
  else: 
    skipped.append(species)
    continue
  isotype = re.findall('\.trna\d+-([A-Za-z]+)', seqname)[0][:-3]
  seqname = '{}_{}'.format(species, seqname)
  trna = {'domain': row.domain.values[0], 'clade': row.clade.values[0], 'species': species, 'species_long': row.longname.values[0], 'taxid': row.taxid.values[0], 'seqname': seqname, 'isotype': isotype}
  trna = {**trna, **{sprinzl: base for sprinzl, base in position_base(positions, seq)}}
  trnas.append(trna)

if len(skipped) > 0: print('skipped the following: {}'.format(set(skipped)))
identities = identities.append(trnas, ignore_index=True)
identities.fillna('.', inplace=True)
alignment_fhandle.close()

18972 tRNAs processed (10%)
37944 tRNAs processed (20%)
56916 tRNAs processed (30%)
75888 tRNAs processed (40%)
94860 tRNAs processed (50%)
113832 tRNAs processed (60%)
132804 tRNAs processed (70%)
151776 tRNAs processed (80%)
170748 tRNAs processed (90%)
189720 tRNAs processed (100%)
skipped the following: {'aeroCami_SY1_JCM_12091', 'haloMedi_ATCC33500_CGMCC_1_208', 'leptInte_LAI_56601', 'ammoDege_KC4', 'shewPutr_CN_32', 'sulfIsla_M_16_2', 'sulfIsla_M_16_23', 'candNitr_EVERGLADENSIS_SR1', 'flavPsyc_JIP02_86', 'sulfSolf_SULA', 'shewFrig_NCIMB_400', 'candMeth_INTESTINALIS_ISSOIRE', 'carbHydr_Z_2901', 'sulfAcid_N8', 'haemInfl_RD_KW20', 'koraCryp1', 'vibrVuln_CMCP6', 'borrBurg_B31', 'eschColi_B_REL606', 'saliRube_M8', 'rhodMari_SG0_5JP17_172', 'persMari_EX_H1', 'sulfIsla_LAL14_1', 'pyroFuma1', 'sulfAcid_SUSAZ', 'butyProt_B316', 'caldLagu_DSM15908', 'shewOnei_MR_1', 'shewSp_MR_7', 'haloWals_DSM16790_HBSQ001', 'shigFlex_2A_301', 'sulfIsla_M_16_43', 'shewHali_HAW_EB4', 'therCurv_DSM43183', '

### Create single base columns from paired positions

In [125]:
cols = list(filter(lambda x: ':' in x, identities.columns))
for col in cols:
  pos1, pos2 = col.split(':')
  base1 = [bases.split(':')[0] for bases in identities[col]]
  base2 = [bases.split(':')[1] for bases in identities[col]]
  identities[pos1] = base1
  identities[pos2] = base2

### Additional sequence information

In [129]:
# Isotype, anticodon, score, isoscores
seqinfo = []
for line in open('pro-tRNAs.sto'):
  if line[0:4] != "#=GS": continue
  _, desc, _, _, isotype, anticodon, _, _, _, score, _, isoscore, isotype_best, _, isoscore_ac, _, intron_length = line.strip().split()
  seqname = desc.replace('|', '_', 1)
  seqinfo.append([seqname, isotype_best[1:-1], anticodon[1:-1], float(score), float(isoscore), float(isoscore_ac), str(intron_length)])
seqinfo = pd.DataFrame(seqinfo, columns=['seqname', 'isotype_best', 'anticodon', 'score', 'isoscore', 'isoscore_ac', 'intron'])
identities = identities.merge(seqinfo, on='seqname')

# iMet/fMet is likely not to be properly annotated
unlifted_imets = identities.ix[(identities.isotype == "Met") & (identities.isotype_best == "iMet"), ].index
identities.ix[unlifted_imets, 'isotype'] = 'iMet'

unlifted_fmets = identities.ix[(identities.isotype == "Met") & (identities.isotype_best == "fMet"), ].index
identities.ix[unlifted_imets, 'isotype'] = 'fMet'

# GC content
paired_cols = identities.columns[list(map(lambda x: (':' in x), identities.columns))]
identities['GC'] = identities[paired_cols].apply(lambda x: sum((x == "G:C") | (x == "C:G"))/len(paired_cols), axis=1)

### Insertions/deletions

This doesn't take into account noncanonical introns in archaea.

In [130]:
# Insertions (minus misaligned introns at 37/38)
intron_cols = list(filter(lambda x: x[0:3] == '37i', identities.columns))
insertion_cols = list(filter(lambda x: bool(re.search('^\d+i', x)) & (x not in intron_cols), identities.columns))
identities['insertions'] = identities[insertion_cols].apply(lambda x: sum(x != '.'), axis=1)

# Deletions at positions that are not the variable arm, and not counting 17/17a/20a/20b
base_cols = list(filter(lambda x: bool(re.match('^\d+$', x)) & (x not in ['74', '75', '76', '17', '17a', '20a', '20b']), identities.columns))
identities['deletions'] = identities[base_cols].apply(lambda x: ''.join(x).count('-'), axis=1)

### Restrict tRNAs by species

We may also want to limit the contribution of any single species, similar to how we built the isotype-specific models. The restricted set is a subset of the quality set.

In [131]:
identities2 = identities.copy

In [151]:
identities.shortname = identities.species
identities.species = numpy.apply_along_axis(numpy.vectorize(lambda x: ' '.join(x.split(' ')[:2])), 0, identities.species_long)

In [158]:
identities.ix[:, 'restrict'] = False
for species in identities.species.unique():
  sys.stdout.write("Processing {}...".format(species))
  sys.stdout.flush()
  species_df = identities.ix[identities.species == species, :]

  for isotype in isotypes:
    sys.stdout.flush()
    isotype_indices = identities.ix[(identities.species == species) & (identities.isotype == isotype) & (-identities.restrict), ].index
    unique_scores_indices = isotype_indices[-identities.ix[isotype_indices, 'score'].duplicated()]
    unique_scores_indices = unique_scores_indices[numpy.argsort(identities.ix[unique_scores_indices, 'score'])][::-1][:50]
    restricted_indices = list(set(isotype_indices) - set(unique_scores_indices))    
    identities.ix[restricted_indices, 'restrict'] = True

  print("done")
  sys.stdout.flush()

Processing Acaryochloris marina...done
Processing Acetohalobium arabaticum...done
Processing Acetobacterium woodii...done
Processing Aciduliprofundum boonei...done
Processing Acidithiobacillus caldus...done
Processing Acidothermus cellulolyticus...done
Processing Acidaminococcus fermentans...done
Processing Acidithiobacillus ferrooxidans...done
Processing Acidithiobacillus ferrivorans...done
Processing Acidianus hospitalis...done
Processing Acidaminococcus intestini...done
Processing Acidilobus saccharovorans...done
Processing Aciduliprofundum sp....done
Processing Acinetobacter baumannii...done
Processing Acinetobacter calcoaceticus...done
Processing Acinetobacter oleivorans...done
Processing Acinetobacter sp....done
Processing Actinobacillus equuli...done
Processing Actinoplanes friuliensis...done
Processing Actinosynnema mirum...done
Processing Actinoplanes missouriensis...done
Processing Actinobacillus pleuropneumoniae...done
Processing Actinobaculum schaalii...done
Processing Acti

## Export to R

R has superior visualization capabilities.

### Order columns

To make it look pretty.

In [164]:
def position_str_to_int(position):
  if position == "20a": return 20.1
  if position == "20b": return 20.2
  digits = re.findall('\d+', position)
  if len(digits) == 0: return -1
  insert = 0
  if 'i' in position and len(digits) == 2: insert = float(digits[1]) / 1000
  if position[0] == 'V':
    if ':' in position: return int(digits[0]) + 45 - 10 + insert # V11~V17
    else: return int(digits[0]) + 45 + 7 + insert # V1~V5
  if int(digits[0]) >= 46: return int(digits[0]) + 50 + insert # just add an arbitrarily large number to skip v-arm
  return int(digits[0]) + insert

identities = identities[sorted(list(identities.columns), key=position_str_to_int)]

In [165]:
identities.to_csv(path_or_buf='identities.tsv', sep='\t', index_label=False)

In [166]:
identities.head()

Unnamed: 0,clade,domain,isotype,seqname,species,species_long,taxid,isotype_best,anticodon,score,isoscore,isoscore_ac,intron,GC,insertions,deletions,restrict,1:72,1,1i1,2:71,2,2i1,3:70,3,3i1,3i2,3i3,3i4,3i5,3i6,4:69,4,4i1,4i2,4i3,5:68,5,6:67,6,6i1,6i2,7:66,7,7i1,7i2,8,8i1,9,9i1,10:25,10,10i1,10i2,11:24,11,12:23,12,12i1,13:22,13,13i1,13i2,13i3,13i4,13i5,14,14i1,14i2,14i3,14i4,14i5,14i6,14i7,14i8,14i9,14i10,14i11,14i12,14i13,14i14,14i15,15,15i1,15i2,15i3,15i4,15i5,15i6,15i7,15i8,15i9,16,16i1,16i2,16i3,16i4,16i5,16i6,16i7,16i8,16i9,16i10,16i11,16i12,16i13,16i14,16i15,16i16,16i17,16i18,16i19,16i20,16i21,16i22,16i23,16i24,16i25,16i26,16i27,16i28,16i29,16i30,16i31,16i32,16i33,16i34,16i35,16i36,16i37,16i38,16i39,16i40,16i41,16i42,16i43,16i44,16i45,16i46,16i47,16i48,16i49,16i50,17,17a,18,19,19i1,19i2,19i3,19i4,19i5,19i6,19i7,19i8,19i9,19i10,19i11,19i12,19i13,19i14,19i15,19i16,19i17,19i18,19i19,19i20,19i21,19i22,19i23,19i24,19i25,19i26,19i27,19i28,19i29,19i30,19i31,19i32,19i33,19i34,19i35,19i36,20,20i1,20a,20b,21,22,22i1,22i2,23,23i1,23i2,23i3,23i4,23i5,23i6,23i7,23i8,23i9,23i10,23i11,23i12,23i13,23i14,23i15,23i16,24,24i1,25,26,26i1,26i2,27:43,27,27i1,28:42,28,28i1,29:41,29,29i1,30:40,30,30i1,31:39,31,31i1,31i2,31i3,31i4,31i5,31i6,31i7,31i8,31i9,31i10,31i11,31i12,32,32i1,32i2,32i3,32i4,33,34,35,36,37,37i1,37i2,37i3,37i4,37i5,37i6,37i7,37i8,37i9,37i10,37i11,37i12,37i13,37i14,37i15,37i16,37i17,37i18,37i19,37i20,37i21,37i22,37i23,37i24,37i25,37i26,37i27,37i28,37i29,37i30,37i31,37i32,37i33,37i34,37i35,37i36,37i37,37i38,37i39,37i40,37i41,37i42,37i43,37i44,37i45,37i46,37i47,37i48,37i49,37i50,37i51,37i52,37i53,37i54,37i55,37i56,37i57,37i58,37i59,37i60,37i61,37i62,37i63,37i64,37i65,37i66,37i67,37i68,37i69,37i70,37i71,37i72,37i73,37i74,37i75,37i76,37i77,37i78,37i79,37i80,37i81,37i82,37i83,37i84,37i85,37i86,37i87,37i88,37i89,37i90,37i91,37i92,37i93,37i94,37i95,37i96,37i97,37i98,37i99,37i100,37i101,37i102,37i103,37i104,37i105,37i106,37i107,37i108,37i109,37i110,37i111,37i112,37i113,37i114,37i115,37i116,37i117,37i118,37i119,37i120,37i121,37i122,37i123,37i124,37i125,37i126,37i127,37i128,37i129,37i130,37i131,37i132,37i133,37i134,37i135,37i136,37i137,37i138,37i139,37i140,37i141,37i142,37i143,37i144,37i145,37i146,37i147,37i148,37i149,37i150,37i151,37i152,37i153,37i154,37i155,37i156,37i157,37i158,37i159,37i160,37i161,37i162,37i163,37i164,37i165,37i166,37i167,37i168,37i169,37i170,37i171,37i172,37i173,37i174,37i175,37i176,37i177,37i178,37i179,37i180,37i181,37i182,37i183,37i184,37i185,37i186,37i187,37i188,37i189,37i190,37i191,37i192,37i193,37i194,37i195,37i196,37i197,37i198,37i199,37i200,37i201,37i202,37i203,37i204,37i205,37i206,37i207,37i208,37i209,37i210,37i211,37i212,37i213,37i214,37i215,37i216,37i217,37i218,37i219,37i220,37i221,37i222,37i223,37i224,37i225,37i226,37i227,37i228,37i229,37i230,37i231,37i232,37i233,37i234,37i235,37i236,37i237,37i238,37i239,37i240,37i241,37i242,37i243,37i244,37i245,37i246,37i247,37i248,37i249,37i250,37i251,37i252,38,38i1,38i2,39,39i1,40,41,41i1,42,42i1,43,44,44i1,44i2,44i3,44i4,44i5,44i6,44i7,44i8,44i9,44i10,44i11,44i12,44i13,44i14,44i15,44i16,44i17,44i18,44i19,44i20,44i21,44i22,45,V11:V21,V12:V22,V13:V23,V14:V24,V15:V25,V16:V26,V17:V27,V1,V2,V3,V4,V5,V11,V12,V13,V14,V15,V16,V17,V21,V22,V23,V24,V25,V26,V27,46,47,47i1,47i2,47i3,47i4,47i5,47i6,48,49:65,49,50:64,50,50i1,51:63,51,51i1,52:62,52,52i1,53:61,53,53i1,53i2,53i3,53i4,53i5,53i6,53i7,53i8,53i9,53i10,53i11,53i12,53i13,53i14,53i15,53i16,53i17,53i18,53i19,53i20,53i21,53i22,53i23,53i24,53i25,53i26,53i27,53i28,53i29,53i30,53i31,53i32,53i33,53i34,53i35,53i36,53i37,53i38,53i39,53i40,54,54i1,55,56,57,57i1,58,59,59i1,59i2,59i3,60,60i1,60i2,60i3,60i4,60i5,60i6,60i7,60i8,60i9,60i10,60i11,60i12,60i13,60i14,60i15,60i16,60i17,60i18,60i19,60i20,60i21,60i22,60i23,60i24,60i25,60i26,60i27,60i28,60i29,60i30,60i31,60i32,60i33,60i34,60i35,60i36,60i37,60i38,60i39,60i40,60i41,60i42,60i43,60i44,60i45,60i46,60i47,60i48,60i49,60i50,60i51,60i52,60i53,60i54,60i55,60i56,60i57,60i58,60i59,60i60,60i61,60i62,60i63,60i64,60i65,60i66,60i67,60i68,61,62,62i1,62i2,63,63i1,64,64i1,64i2,65,65i1,66,66i1,67,67i1,67i2,67i3,67i4,68,68i1,68i2,68i3,69,69i1,70,70i1,71,71i1,72,72i1,72i2,73,74,75,76
0,Cyanobacteria,bacteria,Arg,acarMari_MBIC11017_chr.trna1-ArgTCT,Acaryochloris marina,Acaryochloris marina MBIC11017,Oscillatoriophycideae,Arg,TCT,80.5,96.2,96.2,0,0.535714,0,0,False,G:C,G,.,C:G,C,.,G:C,G,.,.,.,.,.,.,C:G,C,.,.,.,U:A,U,C:G,C,.,.,G:C,G,.,.,U,.,A,.,G:C,G,.,.,C:G,C,U:A,U,.,C:G,C,.,.,.,.,.,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,.,.,.,.,.,.,.,.,.,C,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,-,-,G,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,A,.,U,-,A,G,.,.,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,.,C,A,.,.,G:U,G,.,U:A,U,.,U:A,U,.,G:C,G,.,C:G,C,.,.,.,.,.,.,.,.,.,.,.,.,C,.,.,.,.,U,U,C,U,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,A,.,.,G,.,C,A,.,A,.,U,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,-:-,-:-,-:-,-:-,-:-,-:-,-:-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,G,U,.,.,.,.,.,.,C,G:C,G,C:G,C,.,A:U,A,.,G:C,G,.,G:C,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,U,.,U,C,G,.,A,G,.,.,.,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,C,C,.,.,U,.,G,.,.,C,.,C,.,G,.,.,.,.,A,.,.,.,G,.,C,.,G,.,C,.,.,G,-,-,-
1,Cyanobacteria,bacteria,Asp,acarMari_MBIC11017_chr.trna2-AspGTC,Acaryochloris marina,Acaryochloris marina MBIC11017,Oscillatoriophycideae,Pro,GTC,71.8,82.6,60.2,0,0.571429,0,0,False,G:C,G,.,G:C,G,.,G:C,G,.,.,.,.,.,.,G:C,G,.,.,.,G:C,G,U:U,U,.,.,G:C,G,.,.,U,.,A,.,G:C,G,.,.,C:G,C,U:A,U,.,C:G,C,.,.,.,.,.,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,.,.,.,.,.,.,.,.,.,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,C,-,G,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,U,.,C,U,A,G,.,.,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,.,C,A,.,.,G:A,G,.,U:A,U,.,C:G,C,.,G:C,G,.,C:G,C,.,.,.,.,.,.,.,.,.,.,.,.,C,.,.,.,.,U,G,U,C,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,A,.,.,G,.,C,G,.,A,.,A,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,-:-,-:-,-:-,-:-,-:-,-:-,-:-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,G,U,.,.,.,.,.,.,C,G:U,G,C:G,C,.,G:C,G,.,G:C,G,.,G:C,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,U,.,U,C,A,.,A,A,.,.,.,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,C,C,.,.,C,.,G,.,.,U,.,C,.,U,.,.,.,.,C,.,.,.,C,.,C,.,C,.,C,.,.,G,-,-,-
2,Cyanobacteria,bacteria,Cys,acarMari_MBIC11017_chr.trna3-CysGCA,Acaryochloris marina,Acaryochloris marina MBIC11017,Oscillatoriophycideae,Cys,GCA,65.6,73.1,73.1,0,0.5,0,1,False,G:C,G,.,U:A,U,.,C:G,C,.,.,.,.,.,.,C:G,C,.,.,.,A:U,A,G:C,G,.,.,G:C,G,.,.,U,.,C,.,G:C,G,.,.,C:G,C,C:G,C,.,A:A,A,.,.,.,.,.,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,.,.,.,.,.,.,.,.,.,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,-,-,G,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,U,.,C,G,A,A,.,.,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,.,C,G,.,.,U:A,U,.,C:G,C,.,G:C,G,.,G:U,G,.,U:A,U,.,.,.,.,.,.,.,.,.,.,.,.,C,.,.,.,.,U,G,C,A,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,A,.,.,A,.,U,C,.,G,.,A,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,-,-:-,-:-,-:-,-:-,-:-,-:-,-:-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,A,U,.,.,.,.,.,.,C,G:C,G,U:A,U,.,G:C,G,.,G:C,G,.,G:C,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,U,.,U,C,A,.,A,C,.,.,.,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,C,C,.,.,C,.,A,.,.,C,.,C,.,C,.,.,.,.,U,.,.,.,G,.,G,.,A,.,C,.,.,U,C,C,A
3,Cyanobacteria,bacteria,Trp,acarMari_MBIC11017_chr.trna4-TrpCCA,Acaryochloris marina,Acaryochloris marina MBIC11017,Oscillatoriophycideae,Trp,CCA,55.1,59.9,59.9,0,0.428571,0,1,False,G:C,G,.,C:G,C,.,G:C,G,.,.,.,.,.,.,G:C,G,.,.,.,G:U,G,G:C,G,.,.,A:U,A,.,.,U,.,G,.,G:C,G,.,.,U:A,U,G:C,G,.,U:A,U,.,.,.,.,.,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,A,.,.,.,.,.,.,.,.,.,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,-,-,G,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,C,.,-,-,A,A,.,.,C,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,A,.,C,C,.,.,U:A,U,.,C:G,C,.,A:U,A,.,G:C,G,.,U:G,U,.,.,.,.,.,.,.,.,.,.,.,.,C,.,.,.,.,U,C,C,A,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,A,.,.,G,.,C,U,.,G,.,A,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,-,-:-,-:-,-:-,-:-,-:-,-:-,-:-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,G,U,.,.,.,.,.,.,U,C:G,C,U:A,U,.,G:U,G,.,G:C,G,.,G:C,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,U,.,U,C,A,.,A,A,.,.,.,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,C,C,.,.,U,.,A,.,.,G,.,U,.,C,.,.,.,.,U,.,.,.,C,.,C,.,G,.,C,.,.,G,-,-,-
4,Cyanobacteria,bacteria,Glu,acarMari_MBIC11017_chr.trna5-GluTTC,Acaryochloris marina,Acaryochloris marina MBIC11017,Oscillatoriophycideae,Gln,TTC,64.2,53.1,34.1,0,0.464286,0,0,False,A:U,A,.,G:C,G,.,U:A,U,.,.,.,.,.,.,G:C,G,.,.,.,G:C,G,U:A,U,.,.,G:C,G,.,.,U,.,A,.,C:G,C,.,.,G:C,G,C:G,C,.,A:A,A,.,.,.,.,.,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,A,.,.,.,.,.,.,.,.,.,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,-,-,G,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,U,.,U,U,A,A,.,.,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,C,.,G,A,.,.,C:G,C,.,U:A,U,.,U:A,U,.,G:C,G,.,A:C,A,.,.,.,.,.,.,.,.,.,.,.,.,U,.,.,.,.,U,U,U,C,A,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,A,.,.,C,.,C,A,.,A,.,G,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,G,-:-,-:-,-:-,-:-,-:-,-:-,-:-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,A,U,.,.,.,.,.,.,U,G:U,G,C:G,C,.,G:C,G,.,G:C,G,.,G:C,G,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,U,.,U,C,A,.,A,A,.,.,.,U,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,C,C,.,.,C,.,G,.,.,U,.,C,.,A,.,.,.,.,C,.,.,.,C,.,A,.,C,.,U,.,.,C,C,C,A
