In [1]:
USE_FULL_1900_DIM_MODEL = False # if True use 1900 dimensional model, else use 64 dimensional one.

In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set seeds
tf.set_random_seed(42)
np.random.seed(42)

if USE_FULL_1900_DIM_MODEL:
    # Sync relevant weight files
    !aws s3 sync --no-sign-request --quiet s3://unirep-public/1900_weights/ 1900_weights/
    
    # Import the mLSTM babbler model
    from unirep import babbler1900 as babbler
    
    # Where model weights are stored.
    MODEL_WEIGHT_PATH = "./1900_weights"
    
else:
    # Sync relevant weight files
    !aws s3 sync --no-sign-request --quiet s3://unirep-public/64_weights/ 64_weights/
    
    # Import the mLSTM babbler model
    from unirep import babbler64 as babbler
    
    # Where model weights are stored.
    MODEL_WEIGHT_PATH = "./64_weights"

In [3]:
batch_size = 50
b = babbler(batch_size=batch_size, model_path=MODEL_WEIGHT_PATH)

  from ._conv import register_converters as _register_converters


In [4]:
# Before you can train your model, 
sequences = []
with open("int_seqs.txt", "r") as source:
    with open("formatted.txt", "w") as destination:
        for i,seq in enumerate(source):
            seq = seq.strip()
            sequences.append(seq)
            if b.is_valid_seq(seq) and len(seq) < 275: 
                formatted = ",".join(map(str,b.format_seq(seq)))
                destination.write(formatted)
                destination.write('\n')

In [5]:
## 
average_hidden_list = []
final_hidden_list = []
hs_list = []
final_cell_list = []


num2 = range(0, 50)
x = 0
y = 50
for i in num2:
    num1 = range(x, y)
    for j in num1:
        avg_hidden, final_hidden, final_cell, hs_out = (b.get_rep_hs(sequences[j]))
        average_hidden_list.append(avg_hidden)
        final_hidden_list.append(final_hidden)
        final_cell_list.append(final_cell)
        hs_list.append(hs_out)
    print('rep')
    x = x + 50
    y = y + 50
    


IndexError: list index out of range

In [6]:
average_hidden_pd = pd.DataFrame(np.row_stack(average_hidden_list))
final_hidden_pd = pd.DataFrame(np.row_stack(final_hidden_list))
hidden_state = pd.DataFrame(np.row_stack(hs_list))
print(average_hidden_pd)

         0         1         2         3         4         5         6   \
0  0.034794  0.169814 -0.100375 -0.959592 -0.014687 -0.241810  0.063225   
1  0.041084  0.174387 -0.102282 -0.958222 -0.015408 -0.241340  0.063806   
2  0.030618  0.162752 -0.101536 -0.956447 -0.014310 -0.256561  0.065182   

         7         8         9     ...           54        55        56  \
0  0.235519  0.298101 -0.361683    ...     0.116661  0.001761  0.048558   
1  0.235524  0.298415 -0.367180    ...     0.117779  0.000442  0.049546   
2  0.241328  0.297537 -0.372796    ...     0.121538  0.003708  0.050110   

         57        58        59        60        61        62        63  
0 -0.026543 -0.031209  0.330395 -0.169097 -0.048049  0.514019 -0.025693  
1 -0.025075 -0.030870  0.332053 -0.170549 -0.047997  0.513000 -0.024566  
2 -0.020846 -0.031466  0.323022 -0.165265 -0.045657  0.522076 -0.026712  

[3 rows x 64 columns]


In [7]:
average_hidden_pd.to_csv("emi_int_reps.csv")

In [8]:
import pickle
save_loc = "C:\\Users\\makow\\Documents\\GitHub\\UniRep\\Datasets"
data_name = 'ins_seq_1'
file_append = '.pickle'


fn = save_loc + data_name + 'avg_hidden' + file_append
with open(fn, 'wb') as f:
    pickle.dump(average_hidden_list, f)

fn = save_loc + data_name + 'final_hidden' + file_append
with open(fn, 'wb') as f:
    pickle.dump(final_hidden_list, f)

fn = save_loc + data_name + 'final_cell' + file_append
with open(fn, 'wb') as f:
    pickle.dump(final_cell_list, f)
   
fn = save_loc + data_name + 'hidden_state' + file_append
with open(fn, 'wb') as f:
    pickle.dump(hs_list, f)
   
fn = save_loc + data_name + 'all_output_hs' + file_append
with open(fn, 'wb') as f:
    pickle.dump([average_hidden_list, final_hidden_list, final_cell_list, hs_list], f)