In [1]:
USE_FULL_1900_DIM_MODEL = False # if True use 1900 dimensional model, else use 64 dimensional one.

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set seeds
tf.set_random_seed(42)
np.random.seed(42)

if USE_FULL_1900_DIM_MODEL:
    # Sync relevant weight files
    !aws s3 sync --no-sign-request --quiet s3://unirep-public/1900_weights/ 1900_weights/
    
    # Import the mLSTM babbler model
    from unirep import babbler1900 as babbler
    
    # Where model weights are stored.
    MODEL_WEIGHT_PATH = "./1900_weights"
    
else:
    # Sync relevant weight files
    !aws s3 sync --no-sign-request --quiet s3://unirep-public/64_weights/ 64_weights/
    
    # Import the mLSTM babbler model
    from unirep import babbler64 as babbler
    
    # Where model weights are stored.
    MODEL_WEIGHT_PATH = "./64_weights"

In [3]:
batch_size = 50
b = babbler(batch_size=batch_size, model_path=MODEL_WEIGHT_PATH)

  from ._conv import register_converters as _register_converters


In [5]:
# Before you can train your model, 
sequences = []
with open("emi_novel_R2_seqs.txt", "r") as source:
    with open("formatted.txt", "w") as destination:
        for i,seq in enumerate(source):
            seq = seq.strip()
            sequences.append(seq)
            if b.is_valid_seq(seq) and len(seq) < 275: 
                formatted = ",".join(map(str,b.format_seq(seq)))
                destination.write(formatted)
                destination.write('\n')

In [6]:
## 
average_hidden_list = []
final_hidden_list = []
hs_list = []
final_cell_list = []


num2 = range(0, 50)
x = 0
y = 50
for i in num2:
    num1 = range(x, y)
    for j in num1:
        avg_hidden, final_hidden, final_cell, hs_out = (b.get_rep_hs(sequences[j]))
        average_hidden_list.append(avg_hidden)
        final_hidden_list.append(final_hidden)
        final_cell_list.append(final_cell)
        hs_list.append(hs_out)
    print('rep')
    x = x + 50
    y = y + 50
    


IndexError: list index out of range

In [7]:
average_hidden_pd = pd.DataFrame(np.row_stack(average_hidden_list))
final_hidden_pd = pd.DataFrame(np.row_stack(final_hidden_list))
hidden_state = pd.DataFrame(np.row_stack(hs_list))
print(average_hidden_pd)

          0         1         2         3         4         5         6   \
0   0.035535  0.167171 -0.098253 -0.961980 -0.011877 -0.245272  0.058685   
1   0.034834  0.166253 -0.100336 -0.960168 -0.012382 -0.245872  0.057680   
2   0.036384  0.164464 -0.095186 -0.958135 -0.010737 -0.240504  0.057311   
3   0.033601  0.165395 -0.100427 -0.961394 -0.013071 -0.247018  0.058941   
4   0.035962  0.167376 -0.100315 -0.960088 -0.012033 -0.247449  0.056915   
5   0.033054  0.163880 -0.101697 -0.957339 -0.011935 -0.247242  0.061496   
6   0.034182  0.167940 -0.099446 -0.960929 -0.011812 -0.245349  0.058089   
7   0.034949  0.169506 -0.105904 -0.955092 -0.013352 -0.242997  0.062520   
8   0.037837  0.170839 -0.102597 -0.956848 -0.012679 -0.247230  0.062006   
9   0.033029  0.166501 -0.099412 -0.958391 -0.010912 -0.251335  0.061225   
10  0.029307  0.171944 -0.099085 -0.955278 -0.011593 -0.234247  0.065672   
11  0.034465  0.165498 -0.098959 -0.959000 -0.011372 -0.252436  0.060635   
12  0.029258

In [8]:
average_hidden_pd.to_csv("emi_novel_R2_reps.csv")
final_hidden_pd.to_csv("emi_novel_R2_finalhidden.csv")

In [8]:
import pickle
save_loc = "C:\\Users\\makow\\Documents\\GitHub\\UniRep\\Datasets"
data_name = 'ins_seq_1'
file_append = '.pickle'


fn = save_loc + data_name + 'avg_hidden' + file_append
with open(fn, 'wb') as f:
    pickle.dump(average_hidden_list, f)

fn = save_loc + data_name + 'final_hidden' + file_append
with open(fn, 'wb') as f:
    pickle.dump(final_hidden_list, f)

fn = save_loc + data_name + 'final_cell' + file_append
with open(fn, 'wb') as f:
    pickle.dump(final_cell_list, f)
   
fn = save_loc + data_name + 'hidden_state' + file_append
with open(fn, 'wb') as f:
    pickle.dump(hs_list, f)
   
fn = save_loc + data_name + 'all_output_hs' + file_append
with open(fn, 'wb') as f:
    pickle.dump([average_hidden_list, final_hidden_list, final_cell_list, hs_list], f)