In [1]:
USE_FULL_1900_DIM_MODEL = False # if True use 1900 dimensional model, else use 64 dimensional one.

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set seeds
tf.set_random_seed(42)
np.random.seed(42)

if USE_FULL_1900_DIM_MODEL:
    # Sync relevant weight files
    !aws s3 sync --no-sign-request --quiet s3://unirep-public/1900_weights/ 1900_weights/
    
    # Import the mLSTM babbler model
    from unirep import babbler1900 as babbler
    
    # Where model weights are stored.
    MODEL_WEIGHT_PATH = "./1900_weights"
    
else:
    # Sync relevant weight files
    !aws s3 sync --no-sign-request --quiet s3://unirep-public/64_weights/ 64_weights/
    
    # Import the mLSTM babbler model
    from unirep import babbler64 as babbler
    
    # Where model weights are stored.
    MODEL_WEIGHT_PATH = "./64_weights"

In [3]:
batch_size = 50
b = babbler(batch_size=batch_size, model_path=MODEL_WEIGHT_PATH)

  from ._conv import register_converters as _register_converters


In [5]:
# Before you can train your model, 
sequences = []
with open("emi_neg_seqs_pva_1.txt", "r") as source:
    with open("formatted.txt", "w") as destination:
        for i,seq in enumerate(source):
            seq = seq.strip()
            sequences.append(seq)
            if b.is_valid_seq(seq) and len(seq) < 275: 
                formatted = ",".join(map(str,b.format_seq(seq)))
                destination.write(formatted)
                destination.write('\n')

In [None]:
## 
average_hidden_list = []
final_hidden_list = []
hs_list = []
final_cell_list = []


num2 = range(0, 50)
x = 0
y = 50
for i in num2:
    num1 = range(x, y)
    for j in num1:
        avg_hidden, final_hidden, final_cell, hs_out = (b.get_rep_hs(sequences[j]))
        average_hidden_list.append(avg_hidden)
        final_hidden_list.append(final_hidden)
        final_cell_list.append(final_cell)
        hs_list.append(hs_out)
    print('rep')
    x = x + 50
    y = y + 50
    


rep
rep
rep
rep


In [6]:
average_hidden_pd = pd.DataFrame(np.row_stack(average_hidden_list))
final_hidden_pd = pd.DataFrame(np.row_stack(final_hidden_list))
hidden_state = pd.DataFrame(np.row_stack(hs_list))
print(hidden_state)

             0         1         2         3         4         5         6   \
0     -0.165004  0.367741  0.358379 -0.652705  0.310745  0.112354  0.319343   
1     -0.156224  0.020769 -0.005827 -0.946178 -0.175482  0.024309  0.109565   
2     -0.076103  0.039707 -0.068597 -0.982010 -0.074966 -0.034950  0.068059   
3     -0.058311  0.053986 -0.086645 -0.987298 -0.051170 -0.053258  0.027245   
4     -0.003677  0.076352 -0.109282 -0.989626 -0.047260 -0.053738  0.027420   
5     -0.038077  0.097799 -0.118403 -0.985669 -0.050498 -0.049629  0.037225   
6      0.025612  0.064812 -0.113066 -0.984256 -0.059101 -0.051365  0.012105   
7      0.049025  0.082313 -0.087995 -0.990922 -0.041432 -0.112213  0.029521   
8      0.006583  0.135147 -0.078896 -0.990121 -0.015207 -0.122814  0.061241   
9      0.008933  0.127565 -0.079991 -0.987132 -0.015113 -0.152265  0.025057   
10     0.077980  0.103989 -0.058407 -0.984953 -0.021431 -0.128419  0.045190   
11     0.026909  0.202782 -0.059125 -0.989396 -0.018

In [7]:
average_hidden_pd.to_csv("emii_neg_reps_ova_1.csv")
final_hidden_pd.to_csv("emi_neg_finalhidden_ova_1.csv")

In [8]:
import pickle
save_loc = "C:\\Users\\makow\\Documents\\GitHub\\UniRep\\Datasets"
data_name = 'emi_neg_reps_ova_1'
file_append = '.pickle'


fn = save_loc + data_name + 'avg_hidden' + file_append
with open(fn, 'wb') as f:
    pickle.dump(average_hidden_list, f)

fn = save_loc + data_name + 'final_hidden' + file_append
with open(fn, 'wb') as f:
    pickle.dump(final_hidden_list, f)

fn = save_loc + data_name + 'final_cell' + file_append
with open(fn, 'wb') as f:
    pickle.dump(final_cell_list, f)
   
fn = save_loc + data_name + 'hidden_state' + file_append
with open(fn, 'wb') as f:
    pickle.dump(hs_list, f)
   
fn = save_loc + data_name + 'all_output_hs' + file_append
with open(fn, 'wb') as f:
    pickle.dump([average_hidden_list, final_hidden_list, final_cell_list, hs_list], f)