#Synthesis of RNA sequences similar to given noncoding-RNA families



In [33]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
import logging
from eden.util import configure_logging
configure_logging(logging.getLogger(), verbosity=1, filename=None)

In [35]:
from eden.converter.fasta import fasta_to_sequence
def get_rfam(rfam_id):
    return fasta_to_sequence('http://rfam.xfam.org/family/%s/alignment?acc=%s&format=fastau&download=0' % (rfam_id, rfam_id))

In [36]:
from eden.util.display import draw_graph
from eden.converter.rna.rnafold import rnafold_to_eden

opts={'size':14,
      'colormap':'Set3',
      'vertex_color':'level',
      'node_border':False,
      'node_size':200,
      'font_size':9,
      'vertex_alpha':0.9,
      'title_key':'id'}

def plot_rna_seq(seq, opts=opts):    
    graphs = rnafold_to_eden([seq])
    from eden.modifier.graph import vertex_attributes 
    graphs = vertex_attributes.colorize(graphs, output_attribute = 'level', labels = ['A','U','C','G'])
    graph=graphs.next()
    draw_graph(graph, **opts)

In [37]:
from rnasynth.rna_synthesizer import RNASynthesizerInitializer
#help(RNASynthesizerInitializer)
from rnasynth.rna_synthesizer import RNASynth
#help(RNASynth)

###Setup

In [None]:
from rnasynth.rna_synthesizer import RNASynthesizerInitializer
synthesizer = RNASynthesizerInitializer(instance_score_threshold=1,
                                        min_size_connected_component_sequence_constraint=3,
                                        importance_threshold_sequence_constraint=-1,
                                        min_size_connected_component_structure_constraint=3,
                                        importance_threshold_structure_constraint=-1,
                                        n_synthesized_seqs_per_seed_seq=1,
                                        vectorizer_complexity=4,
                                        negative_shuffle_ratio=10
                                       ).synthesizer

Created a RNASynthesizer object.


###Fit

In [None]:
%%time
rfam_id = 'RF01685'
rfam_id = 'RF01852'
seqs = get_rfam(rfam_id)
from itertools import islice
train_seqs = islice(seqs, 1000)
iter_seq = synthesizer.fit(train_seqs)

Starting new HTTP connection (1): rfam.xfam.org


###Synthesize sample sequences 

In [None]:
%%time

seqs = get_rfam(rfam_id)
seed_seqs = list(islice(seqs, 5))
synth_seqs = synthesizer.sample(seed_seqs)

samples=[]
import time
start_time = time.time()
import itertools
for i, (seed_seq, synth_seq) in enumerate(itertools.izip(seed_seqs,synth_seqs)):
    header,seq = synth_seq
    samples.append(synth_seq)
    print '%d (%.1f secs)' % (i+1, time.time()-start_time)
    print header
    print seq
    print 'seed:'
    print seed_seq[1]
    plot_rna_seq(seed_seq)
    plot_rna_seq(synth_seq)
    start_time = time.time()

---