# Basic RNNs

    1) One-to-Many:
        Image Captioning:
            One  : Image as is, OR
                   Image coded by CNN, then fed to RNN as a vector.
            Many : Let RNN output captions by rolling out indefinitely. 

    2) Many-to-One:
        Sentiment Analysis:
            Many : The text words embedded into many vectors, ordered.
            One  : Output is the sentiment.
                Method 1: Sum/Concat of all unrolled-RNN output to classifiation layer.
                Method 2:             Use only final RNN output to classifiation layer.

    3) Many-to-Many:
        Version 1: Finish reading the sequence first. Then use final RNN-state to output/start another sequence.
            Sequence Prediction:
                Many(Input)  : [0,1,2,3]
                Many(Output) :          [4,5,6,7]
            Machine Translation:
                Many(Input)  : ['I','want']
                Many(Output) :             ['我','要']

        Version 2: For each input to the RNN, there will be output.
            Memorizing hello:
                Many(Input)  : ['h','e','l','l']
                Many(Output) :     ['e','l','l','o']
            Sequence Labelling:

In [5]:
import sys
## To get helper functions from ano`ther folder
# sys.path.insert(0, '../') # if _helper_basics_ is in previous folder
now_i_am_at = 'home' # home dso test
if now_i_am_at=='home':
    sys.path.insert(0, 'E:/Leonard HDD/Dropbox/DSO/Tasks/')
elif now_i_am_at=='dso':
    sys.path.insert(0, 'D:/Dropbox/DSO/Tasks')

from _helper_basics_ import *

## Deep learning
import tensorflow as tf
# import keras
# from keras.models import Model, Sequential
# from keras.layers import *
# from keras import optimizers
# import keras.backend as K

Using TensorFlow backend.


In [6]:
%matplotlib inline
plt.rcParams['savefig.dpi'] = 100

## Why need autoreload - https://ipython.org/ipython-doc/3/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

print('keras', keras.__version__)
print('tensorflow', tf.__version__)

keras 2.0.5
tensorflow 1.2.0


# One-to-Many LSTM

## Image/Video Captioning
    1) Image is input:
            (Input)  : [frame_1]
            (Encode) : [ Code  ]->[Code][Code][Code]
            (Output) :            [  I,  eat,  rice]
    2) Video is input: 
            (Input)  : [frame_1,...,frame_N]
            (Encode) : [        Code       ]->[Code][Code][Code]
            (Output) :                        [  I,  eat,  rice]
    3) Code is input:
            (Input)  : [        Code       ]->[Code][Code][Code]
            (Output) :                        [  I,  eat,  rice]
        
P.S for 2) we can alternatively read them sequentially (scroll to many-to-many).

In [None]:
def set_X(Input_type, Input_Dimensions):
    m,n,channel,num_code,num_frames,num_words,num_feat_out = Input_Dimensions
    if Input_type == 'image':
        X = np.random.rand(1, m,n, channel)
    if Input_type == 'video':
        X = np.random.rand(1, num_frames, m,n, channel)
    elif Input_type == 'embedding':
        X = np.random.rand(1, num_code)
    return X
def set_Archi(Input_type, Input_Dimensions):
    m,n,channel,num_code,num_frames,num_words,num_feat_out = Input_Dimensions
    if Input_type == 'image':
        Inp = Input( shape=(m,n,channel),name = "Image" )
        ####################### Generate Image_Code (ConvNet) #######################
        x = Conv2D(64, (3,3), name='Conv2D')(Inp) # padding='same'
        x = Flatten()(x)
        x = Dense(num_code, name='Image_Code')(x)
    if Input_type == 'video':
        Inp = Input( shape=(num_frames, m,n,channel),name = "Image" )
        ####################### Generate Video_Code (ConvNet) #######################
        x = Conv3D(64, (5,3,3), name='Conv3D')(Inp) # padding='same'
        x = Flatten()(x)
        x = Dense(num_code, name='Video_Code')(x)
    elif Input_type == 'embedding':
        ########################### Input already a code ############################
        Inp = Input( shape=(num_code,), name = "Image_Code" )
        x = Inp
    return Inp, x

Input_type = 'video'  # 'image' 'video' 'embedding'

m,n,channel  = 5,5,3
num_code     = 64 # Image/Video Code
num_frames   = 99 # Video Frames
num_words    = 10
num_feat_out = 2

## Input : 
Input_Dimensions = [m,n,channel,num_code,num_frames,num_words,num_feat_out]
X = set_X(Input_type, Input_Dimensions)

## Output : (None, num_words, num_feat_out)
y = np.random.rand(1, num_words, num_feat_out)

########################### Architecture ##################################
Inp, x = set_Archi(Input_type, Input_Dimensions)
########## Recurrent Section ##########
x = RepeatVector(num_words, name='Repeat')(x)  ## To tally with num_words
x = LSTM(num_feat_out, name="RNN_1", return_sequences=True)(x)
########################### Printing information ########################
model = Model(Inp, x)
model.summary()
# print_model_weights(model)
################################# Training ##############################
n_batch = 1
n_epoch = 1
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X, y, epochs=n_epoch, batch_size=n_batch, verbose=1)
############################### Evaluation #############################
result = model.predict(X)
# print('X:\n',X[0].T)
# print('result:\n',result[0].T)
# print('y:\n',y[0].T)

# Many-to-One LSTM for Sequence Classification 

## Sentiment Analysis

In [12]:
num_words  = 5
num_feat   = 1
num_class  = 2
class_type = np.array([0,1])

## Input : (None, num_words, num_feat)
seq = np.array( [i/float(num_words) for i in range(num_words)] )
X = seq.reshape(1, num_words, num_feat)

## Output : (None, num_class)
y = class_type.reshape(1, num_class)

########################### Architecture ##################################
Inp = Input(shape=(num_words,num_feat), name="Input")
x   = LSTM(99, name="RNN_1")(Inp) 
x   = Dense(num_class,name = "Output" )(x)
model = Model(Inp, x)
########################### Printing information ########################
model.summary()
# print_model_weights(model)
############################### Evaluation #############################
n_batch = 1
n_epoch = 500
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X, y, epochs=n_epoch, batch_size=n_batch, verbose=0)
############################### Evaluation #############################
result = model.predict(X, batch_size=1, verbose=0)
print('X:\n',X[0].T)
print('result:\n',result[0].T)
print('y:\n',y[0].T)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 5, 1)              0         
_________________________________________________________________
RNN_1 (LSTM)                 (None, 99)                39996     
_________________________________________________________________
Output (Dense)               (None, 2)                 200       
Total params: 40,196
Trainable params: 40,196
Non-trainable params: 0
_________________________________________________________________
X:
 [[ 0.   0.2  0.4  0.6  0.8]]
result:
 [  2.79839151e-06   9.99999881e-01]
y:
 [0 1]


# Many-to-Many RNNs
    There are two versions. 
    1) First is to read all sequence then store it in a memory cell. Subsequently, output from either the memory state, or output->input.
    2) Second is to have output directly from the inputs, instead of reading all the sequences first. 

## Version 1)
### Same Input Output Length
    Speech Enhancement, Sequence Prediction,
        (Input)  : [0,1,2,3]->State
        (Output) :            State->[4,5,6,7]
### Diff Input Output Length
    Machine Translation, Video Captioning
        (Input)  : [I,want,to,eat]->State
        (Output) :                  State->[我,要,吃]

In [228]:
num_features  = 1
num_words_in  = 4
LSTM_neurons  = 10
num_words_out = 3
## Input : (None, num_words_in, num_features)
X = np.array([0,1,2,3]).reshape(1,num_words_in, num_features)
## Output : (None, num_words, num_feat_out)
y = np.array([4,5,6]).reshape(1,num_words_out,num_features)
########################### Architecture ##################################
Inp = Input( shape=(num_words_in, num_features),name = "Input" )
x = LSTM(LSTM_neurons, name = "Encoder")(Inp)
x = RepeatVector(num_words_out, name='Repeat')(x)
x = LSTM(LSTM_neurons, name = "Decoder_1", return_sequences=True)(x)
x = LSTM(num_features, activation='relu', name = "Decoder_2", return_sequences=True)(x)
########################### Printing information ########################
model = Model(inputs=Inp, outputs=x)
model.summary()
# print_model_weights(model)
########################### Training ########################
n_batch = 1
n_epoch = 1000
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X, y, epochs=n_epoch, batch_size=n_batch, verbose=0)
########################### Evaluations ########################
print('X:\n',X.reshape(num_words_in))
print('predict:\n', model.predict(X.reshape(1,num_words_in,num_features)).reshape(num_words_out).T)
print('y:\n',y.reshape(num_words_out))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 4, 1)              0         
_________________________________________________________________
Encoder (LSTM)               (None, 10)                480       
_________________________________________________________________
Repeat (RepeatVector)        (None, 3, 10)             0         
_________________________________________________________________
Decoder_1 (LSTM)             (None, 3, 10)             840       
_________________________________________________________________
Decoder_2 (LSTM)             (None, 3, 1)              48        
Total params: 1,368
Trainable params: 1,368
Non-trainable params: 0
_________________________________________________________________
X:
 [0 1 2 3]
predict:
 [ 3.9658339   4.99893379  6.02482462]
y:
 [4 5 6]


## Version 2)
### Teacher Forcing ?
    Many(Input)  : [<start>, h, e, l, l,   o   ]
    Many(Output) : [   h,    e, l, l, o, <stop>]
    
    Many(Input)  : [<start>, I, want, to,   eat,   rice   ]
    Many(Output) : [   I,  want, to,  eat, rice,    .  ]
Edit : include embedding layer

In [14]:
vocab = 1.*np.array([[1,0,0],  # <start>
                     [0,1,0],  # <stop>
                     [1,1,0],  # h
                     [0,0,1],  # e
                     [1,0,1],  # l
                     [0,1,1]]) # o
# print(vocab.T, vocab.shape)
vocab_size   = np.shape(vocab)[0]
num_words    = 6
num_features = np.shape(vocab)[1]
hello    = vocab[2:]
## Input : (None, num_words, num_features)
X = np.array([ vocab[0],hello[0],hello[1],hello[2],hello[2],hello[3] ])
X = X.reshape(1,num_words,num_features)
## Output : (None, num_words, num_features)
y = np.array([ hello[0],hello[1],hello[2],hello[2],hello[3],vocab[1] ])
y = y.reshape(1,num_words,num_features)
########################### Architecture ##################################
Inp = Input( shape=( num_words,num_features),name = "Input" )
x = LSTM(10, name = "RNN_1", return_sequences=True)(Inp) 
x = LSTM(num_features, activation='sigmoid', name = "Output", return_sequences=True)(x) 
########################### Printing information ########################
model = Model(Inp, x)
model.summary()
# print_model_weights(model)
################################# Training ##############################
n_batch = 1
n_epoch = 500
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X, y, epochs=n_epoch, batch_size=n_batch, verbose=0)
################################ Evaluation #############################
result = model.predict(X)
print('X:\n',X[0].T)
print('result:\n',result[0].T)
print('predict:\n',np.round(result[0].T))
print('y:\n',y[0].T)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           (None, 6, 3)              0         
_________________________________________________________________
RNN_1 (LSTM)                 (None, 6, 10)             560       
_________________________________________________________________
Output (LSTM)                (None, 6, 3)              168       
Total params: 728
Trainable params: 728
Non-trainable params: 0
_________________________________________________________________
X:
 [[ 1.  1.  0.  1.  1.  0.]
 [ 0.  1.  0.  0.  0.  1.]
 [ 0.  0.  1.  1.  1.  1.]]
result:
 [[ 0.6140694   0.14179491  0.91301763  0.9592908   0.03574775  0.        ]
 [ 0.62702775  0.10392392  0.          0.01219696  0.96042001  0.98396033]
 [ 0.26099342  0.82136625  0.92331982  0.9699052   0.9684999   0.0079239 ]]
predict:
 [[ 1.  0.  1.  1.  0.  0.]
 [ 1.  0.  0.  0.  1.  1.]
 [ 0.  1.  1.  1.  1.  

## Video Captioning
### Using Functional Model with TimeDistributed wrapper
    Many(Input)  : ['frame_1','frame_2','frame_3','frame_4']
    Encode_Layer : [ 'code_1', 'code_2', 'code_3', 'code_4']
    Many(Output) :                                  ['I','eat','rice','.']

In [25]:
Input_type = 'video' # 'embedding' 'video'

num_frames, m,n,channel = 99, 10,10, 3
num_code = 64
num_words, num_feat_out = 15, 2

## Input : 
if Input_type == 'video':       X = np.random.rand(1, num_frames, m,n, channel)
elif Input_type == 'embedding': X = np.random.rand(1, num_frames, num_code)

## Output : (None, num_words, num_feat_out)
y = np.random.rand(1, num_words, num_feat_out)

########################### Architecture ##################################
if Input_type == 'video':
    ## Input
    Inp_vid = Input(shape=(num_frames, m,n, channel), name="Video")
    ## Coded frames
    x = TimeDistributed(Conv2D(64, (3,3)),name="Conv2D")(Inp_vid)
    x = TimeDistributed(Flatten(),name="Flatten")(x)
    x = TimeDistributed(Dense(num_code), name="Code_Seq")(x)
    ## Encoder
    x = LSTM(num_feat_out, name="Encoder")(x)  
    x = RepeatVector( num_words, name='Repeat')(x)
    ## Decoder
    x = LSTM(num_feat_out, return_sequences=True, name="Decoder")(x)  
    model = Model(inputs=Inp_vid, outputs=x)

elif Input_type == 'embedding':
    Inp = Input( shape=(num_frames,num_code), name="Input" )
    ## Encoder
    x = LSTM(num_feat_out, name="Encoder")(Inp) 
    x = RepeatVector( num_words,   name='Repeat')(x)
    ## Decoder
    x = LSTM(num_feat_out, return_sequences=True, name="Decoder")(x)  
    model = Model(Inp, x)
########################### Printing information ########################
model.summary()
print_model_weights(model)

################################# Training ##############################
n_batch = 1
n_epoch = 1
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X, y, epochs=n_epoch, batch_size=n_batch, verbose=1)
############################### Evaluation #############################
result = model.predict(X)
# print('X:\n',X[0].T)
print('result:\n',result[0].T)
print('y:\n',y[0].T)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Video (InputLayer)           (None, 99, 10, 10, 3)     0         
_________________________________________________________________
Conv2D (TimeDistributed)     (None, 99, 8, 8, 64)      1792      
_________________________________________________________________
Flatten (TimeDistributed)    (None, 99, 4096)          0         
_________________________________________________________________
Code_Seq (TimeDistributed)   (None, 99, 64)            262208    
_________________________________________________________________
Encoder (LSTM)               (None, 2)                 536       
_________________________________________________________________
Repeat (RepeatVector)        (None, 15, 2)             0         
_________________________________________________________________
Decoder (LSTM)               (None, 15, 2)             40        
Total para

## Ignore below

In [None]:
LSTM(units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', 
     recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, 
     recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, 
     recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0)


# Test

In [263]:
Inp = Input( shape=(10,2), name="Vid_Frame" )
x = LSTM(3, return_sequences=True)(Inp)
#     x = Dense(64, name='Vid_Frame_Code')(x)
model = Model(Inp ,x)
########################### Printing information ########################
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Vid_Frame (InputLayer)       (None, 10, 2)             0         
_________________________________________________________________
lstm_49 (LSTM)               (None, 10, 3)             72        
Total params: 72
Trainable params: 72
Non-trainable params: 0
_________________________________________________________________


In [266]:
X = np.random.rand(2,10,2)
results = model.predict(X)

In [267]:
results.shape

(2, 10, 3)

In [269]:
def get_dataset_file(dataset, default_dataset, origin):
    '''Look for it as if it was a full path, if not, try local file,
    if not try in the data directory.

    Download dataset if it is not present

    '''
    data_dir, data_file = os.path.split(dataset)
    if data_dir == "" and not os.path.isfile(dataset):
        # Check if dataset is in the data directory.
        new_path = os.path.join(
            os.path.split(__file__)[0],
            "..",
            "data",
            dataset
        )
        if os.path.isfile(new_path) or data_file == default_dataset:
            dataset = new_path

    if (not os.path.isfile(dataset)) and data_file == default_dataset:
        from six.moves import urllib
        print('Downloading data from %s' % origin)
        urllib.request.urlretrieve(origin, dataset)

        
    return dataset


def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None,
              sort_by_len=True):
    '''Loads the dataset

    :type path: String
    :param path: The path to the dataset (here IMDB)
    :type n_words: int
    :param n_words: The number of word to keep in the vocabulary.
        All extra words are set to unknow (1).
    :type valid_portion: float
    :param valid_portion: The proportion of the full train set used for
        the validation set.
    :type maxlen: None or positive int
    :param maxlen: the max sequence length we use in the train/valid set.
    :type sort_by_len: bool
    :name sort_by_len: Sort by the sequence lenght for the train,
        valid and test set. This allow faster execution as it cause
        less padding per minibatch. Another mechanism must be used to
        shuffle the train set at each epoch.

    '''

    #############
    # LOAD DATA #
    #############

    # Load the dataset
    path = get_dataset_file(
        path, "imdb.pkl",
        "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")

    if path.endswith(".gz"):
        f = gzip.open(path, 'rb')
    else:
        f = open(path, 'rb')

    train_set = pickle.load(f)
    test_set = pickle.load(f)
    f.close()
    if maxlen:
        new_train_set_x = []
        new_train_set_y = []
        for x, y in zip(train_set[0], train_set[1]):
            if len(x) < maxlen:
                new_train_set_x.append(x)
                new_train_set_y.append(y)
        train_set = (new_train_set_x, new_train_set_y)
        del new_train_set_x, new_train_set_y

    # split training set into validation set
    train_set_x, train_set_y = train_set
    n_samples = len(train_set_x)
    sidx = numpy.random.permutation(n_samples)
    n_train = int(numpy.round(n_samples * (1. - valid_portion)))
    valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
    valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
    train_set_x = [train_set_x[s] for s in sidx[:n_train]]
    train_set_y = [train_set_y[s] for s in sidx[:n_train]]

    train_set = (train_set_x, train_set_y)
    valid_set = (valid_set_x, valid_set_y)

    def remove_unk(x):
        return [[1 if w >= n_words else w for w in sen] for sen in x]

    test_set_x, test_set_y = test_set
    valid_set_x, valid_set_y = valid_set
    train_set_x, train_set_y = train_set

    train_set_x = remove_unk(train_set_x)
    valid_set_x = remove_unk(valid_set_x)
    test_set_x = remove_unk(test_set_x)

    def len_argsort(seq):
        return sorted(range(len(seq)), key=lambda x: len(seq[x]))

    if sort_by_len:
        sorted_index = len_argsort(test_set_x)
        test_set_x = [test_set_x[i] for i in sorted_index]
        test_set_y = [test_set_y[i] for i in sorted_index]

        sorted_index = len_argsort(valid_set_x)
        valid_set_x = [valid_set_x[i] for i in sorted_index]
        valid_set_y = [valid_set_y[i] for i in sorted_index]

        sorted_index = len_argsort(train_set_x)
        train_set_x = [train_set_x[i] for i in sorted_index]
        train_set_y = [train_set_y[i] for i in sorted_index]

    train = (train_set_x, train_set_y)
    valid = (valid_set_x, valid_set_y)
    test = (test_set_x, test_set_y)

    return train, valid, test


In [270]:
train, valid, test = load_data(path="Data/imdb.dict.pkl", n_words=100000, valid_portion=0.1, maxlen=None, sort_by_len=True)

UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 6: ordinal not in range(128)

In [317]:
# import csv

X_train = []

Folder_path = "Data/Integer_Prediction/"
# data_path = 'sample_submission'
data_path = 'train'
# data_path = 'test'

with open(Folder_path+data_path+'.csv', 'r') as csvfile:
#     spamreader = csv.reader(csvfile, delimiter=',')
    spamreader = csv.reader(csvfile)
    print('spamreader',spamreader)
    itr = 0
    for row in spamreader:
        print('\n',row)
        itr += 1
        if itr == 1: continue
        
        Id,Sequence = row
        # print('\tx',Id)
        # print('\tSequence',Sequence)
        X = Sequence.split(",")
        # print('\tX',X)
        for x in X:
            X_train += [int(x)]

        # print('X_train: \n\t',X_train)

        X_train = np.array(X_train).reshape(len(X_train),1)
        print('X_train: ',X_train.shape)
        # print('X_train: ',X_train.T)
        
        if itr == 10:
            break

spamreader <_csv.reader object at 0x000001B1349EB0B0>
['Id', 'Sequence']
['3', '1,3,13,87,1053,28576,2141733,508147108,402135275365,1073376057490373,9700385489355970183,298434346895322960005291,31479360095907908092817694945,11474377948948020660089085281068730']

X_train:  (14, 1)
['7', '1,2,1,5,5,1,11,16,7,1,23,44,30,9,1,47,112,104,48,11,1,95,272,320,200,70,13,1,191,640,912,720,340,96,15,1,383,1472,2464,2352,1400,532,126,17,1,767,3328,6400,7168,5152,2464,784,160,19,1,1535,7424']

X_train:  (14, 1)
['8', '1,2,4,5,8,10,16,20,32,40,64,80,128,160,256,320,512,640,1024,1280,2048,2560,4096,5120,8192,10240,16384,20480,32768,40960,65536,81920,131072,163840,262144,327680,524288,655360,1048576,1310720,2097152']

X_train:  (14, 1)
['11', '1,8,25,83,274,2275,132224,1060067,3312425,10997342,36304451,301432950,17519415551,140456757358,438889687625,1457125820233,4810267148324,39939263006825,2321287521544174,18610239435360217']

X_train:  (14, 1)
['13', '1,111,12211,1343211,147753211,16252853211,178781

In [318]:
import pandas as pd

Folder_path = "Data/Integer_Prediction/"
# data_path = 'sample_submission'
data_path = 'train'
# data_path = 'test'

# load the data
colna = ['id', 'seq']
train = pd.read_csv(Folder_path+"train.csv")
test  = pd.read_csv(Folder_path+"test.csv")

train.columns = colna
test.columns = colna

In [328]:
train.head(5)

Unnamed: 0,id,seq
0,3,"1,3,13,87,1053,28576,2141733,508147108,4021352..."
1,7,"1,2,1,5,5,1,11,16,7,1,23,44,30,9,1,47,112,104,..."
2,8,"1,2,4,5,8,10,16,20,32,40,64,80,128,160,256,320..."
3,11,"1,8,25,83,274,2275,132224,1060067,3312425,1099..."
4,13,"1,111,12211,1343211,147753211,16252853211,1787..."


In [322]:
train.columns = colna

In [330]:
train.shape

(113845, 2)

In [334]:
train['id']

0              3
1              7
2              8
3             11
4             13
5             15
6             16
7             18
8             20
9             21
10            23
11            24
12            26
13            28
14            34
15            35
16            36
17            40
18            41
19            44
20            46
21            49
22            51
23            52
24            53
25            57
26            59
27            61
28            63
29            64
           ...  
113815    227632
113816    227634
113817    227637
113818    227638
113819    227641
113820    227643
113821    227644
113822    227645
113823    227646
113824    227647
113825    227652
113826    227656
113827    227660
113828    227662
113829    227664
113830    227666
113831    227667
113832    227672
113833    227675
113834    227676
113835    227677
113836    227679
113837    227680
113838    227681
113839    227682
113840    227683
113841    227684
113842    2276

In [323]:
train['seq']

0         1,3,13,87,1053,28576,2141733,508147108,4021352...
1         1,2,1,5,5,1,11,16,7,1,23,44,30,9,1,47,112,104,...
2         1,2,4,5,8,10,16,20,32,40,64,80,128,160,256,320...
3         1,8,25,83,274,2275,132224,1060067,3312425,1099...
4         1,111,12211,1343211,147753211,16252853211,1787...
5         1,1,1,1,1,1,1,1,1,5,1,1,1,1,5,5,1,1,1,1,11,5,5...
6         840,1320,1680,2520,3192,3432,4920,5208,5280,57...
7         1,2,7,27,113,483,2138,9681,44374,205500,961614...
8                                           4,6,8,9,26,1752
9         1,2,1,3,4,2,4,8,8,3,5,13,19,15,5,6,19,36,42,28...
10        1,176,570496,9223556096,460993706622976,552660...
11        18,24,30,36,42,54,60,66,84,108,126,138,174,186...
12        0,0,4,7,8,4,6,8,8,9,9,5,2,1,5,3,1,1,0,0,4,7,8,...
13        648391,718064159,7069067389,22742734291,362942...
14        1,1,3,1,3,7,1,3,11,19,1,3,15,35,47,1,3,19,51,1...
15        0,1,4,7,15,18,31,40,50,61,88,87,119,136,151,17...
16        1,2,3,4,5,7,9,12,21,114,200,35