In [2]:
import os

import numpy
import pandas

In [4]:
designs = [
    # 4 sequences with varied MRL from observed data, 50nt utr
    {
        'source': 'from_data_50bp_defined_range_mrl.csv',
        'description': 'Varied MRLs from 50bp library',
        'prefix': 'GGGACATCGTAGAGAGTCGTACTTA',
    },
    
    # 4 sequences with highest MRL from observed data, 50nt utr
    {
        'source': 'from_data_50bp_defined_high_mrl.csv',
        'description': 'High MRLs from 50bp library',
        'prefix': 'GGGACATCGTAGAGAGTCGTACTTA',
    },
    
    # 4 sequences with highest MRL from observed data, 25nt utr
    {
        'source': 'from_data_25bp_rand_high_mrl.csv',
        'description': 'High MRLs from 25bp library',
        'prefix': 'GGG',
    },
    
    # 4 SeqProp sequences, 50nt utr (pick at least 1 with strong hairpin structure)
    {
        'source': 'seqprop_optimus5p_50bp_selected.csv',
        'description': '50bp UTR designed with SeqProp',
        'prefix': 'GGGACATCGTAGAGAGTCGTACTTA',
    },
    
    # 4 SeqProp sequences, 50nt utr, with VAE reg.
    {
        'source': 'seqprop_vae_optimus5p_50bp_selected.csv',
        'description': '50bp UTR designed with SeqProp + VAE',
        'prefix': 'GGGACATCGTAGAGAGTCGTACTTA',
    },
    
    # 5 SeqProp sequences, 25nt utr (pick at least 1 with strong hairpin structure)
    {
        'source': 'seqprop_optimus5p_25bp_selected.csv',
        'description': '25bp UTR designed with SeqProp',
        'prefix': 'GGG',
    },
    
    # 2 SeqProp sequences, 25nt utr, with VAE reg.
    {
        'source': 'seqprop_vae_optimus5p_25bp_selected.csv',
        'description': '25bp UTR designed with SeqProp + VAE',
        'prefix': 'GGG',
    },
    
    # 5 DEN sequences, 50nt utr (pick at least 1 with strong hairpin structure)
    {
        'source': 'genesis_optimus5p_50bp_retrained_ns_earthmover_sequences_selected.csv',
        'description': '50bp UTR designed with DEN',
        'prefix': 'GGGACATCGTAGAGAGTCGTACTTA',
    },
    
    # 2 DEN sequences, 50nt utr, with VAE reg.
    {
        'source': 'genesis_vae_optimus5p_50bp_ns_earthmover_sequences_selected.csv',
        'description': '50bp UTR designed with DEN + VAE',
        'prefix': 'GGGACATCGTAGAGAGTCGTACTTA',
    },
    
    # 4 sequences with varied MRL from Inverse regression DEN, 50nt utr
    {
        'source': 'genesis_invreg_optimus5p_50bp_ns_sequences_selected.csv',
        'description': '50bp UTR with varied MRLs designed with DEN',
        'prefix': 'GGGACATCGTAGAGAGTCGTACTTA',
    },
    
    # 5 DEN sequences, 25nt utr (pick at least 1 with strong hairpin structure)
    {
        'source': 'genesis_vgg16_25bp_rand_earthmover_fw_35_sequences_selected.csv',
        'description': '25bp UTR designed with DEN',
        'prefix': 'GGG',
    },
    
    # 2 DEN sequences, 25nt utr, with VAE reg.
    {
        'source': 'genesis_vae_vgg16_25bp_rand_earthmover_sequences_selected.csv',
        'description': '25bp UTR designed with DEN + VAE',
        'prefix': 'GGG',
    },
]

In [11]:
# Combine data
data = pandas.DataFrame()
for design in designs:
    # Read data source
    design_data = pandas.read_csv(design['source'])
    # Add prefix
    design_data["5'UTR"] = design['prefix'] + design_data["5'UTR"]
    # Add description
    design_data['Description'] = design['description']
    data = data.append(design_data, ignore_index=True)
    
# Save
data.to_csv('combined_designs.csv', index=False)
data

Unnamed: 0,5'UTR,Measured MRL,Predicted MRL,Description
0,GGGACATCGTAGAGAGTCGTACTTAAGGGATGCCACCGTTTAAAGG...,2.142014,1.869994,Varied MRLs from 50bp library
1,GGGACATCGTAGAGAGTCGTACTTAACTCTAGAGTCAGGCATTCTG...,3.497435,3.500646,Varied MRLs from 50bp library
2,GGGACATCGTAGAGAGTCGTACTTACAGCTCCTGCCAACGCAGAAG...,5.000827,5.002466,Varied MRLs from 50bp library
3,GGGACATCGTAGAGAGTCGTACTTACCGGCAAGGGGTCCGGGGGTC...,6.495498,6.496485,Varied MRLs from 50bp library
4,GGGACATCGTAGAGAGTCGTACTTACAAGAGTGCAAGACACGCTCA...,10.360462,7.82314,High MRLs from 50bp library
5,GGGACATCGTAGAGAGTCGTACTTACGAGCCGGAAACGGTACTCTA...,9.309283,7.739455,High MRLs from 50bp library
6,GGGACATCGTAGAGAGTCGTACTTAACGCAAACTTTGTCGTGCCTT...,9.225635,7.644577,High MRLs from 50bp library
7,GGGACATCGTAGAGAGTCGTACTTACCCACGGTGATAGTGCGAGTA...,8.996744,7.939284,High MRLs from 50bp library
8,GGGAATACAGAGTAAACCCAATCCTGAG,7.858999,6.570807,High MRLs from 25bp library
9,GGGCAACACTGGAAACCCGATCCCGAAG,7.532819,6.520417,High MRLs from 25bp library
