In [1]:
import numpy as np
from numpy.random import seed
import tensorflow as tf

from tensorflow import set_random_seed
seed(42)
set_random_seed(42)

from keras.layers import Input, Dense, TimeDistributed, Embedding
from keras.layers import Concatenate, Reshape, Lambda, Multiply, multiply, concatenate
from keras.models import Model
from keras import backend as K

import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''

import tensorflow as tf
import numpy as np
from dataset_load import *

Using TensorFlow backend.


In [2]:
# make first model

def build_base_model(input_shape):
    input_layer = Input(shape=(input_shape,))
    tanh_output = Dense(1, activation='tanh', name='tanh_output')(input_layer)
    
    model = Model(inputs=input_layer, outputs=tanh_output)
    model.summary()
    return model

In [3]:
# load data
# make sure that the first shape is the IMDB training data. 

def open_pickle(path):
    import pickle
    with open(path, 'rb') as f:
        X = pickle.load(f)
    return X

X_train_original = open_pickle('../../data/imdb/imdb_original_preprocessed_xtrain.pickle')
X_test_original = open_pickle('../../data/imdb/imdb_original_preprocessed_xtest.pickle')
y_train_original = open_pickle('../../data/imdb/imdb_original_preprocessed_ytrain.pickle')
y_test_original = open_pickle('../../data/imdb/imdb_original_preprocessed_ytest.pickle')

In [4]:
# Count vectorizer 

from sklearn.feature_extraction.text import CountVectorizer

token = r"(?u)\b[\w\'/]+\b"
cv = CountVectorizer(min_df = 100, token_pattern=token, lowercase=True, binary=True)
X_train = cv.fit_transform(X_train_original)
X_test = cv.transform(X_test_original)

In [5]:
word_list, connotation = load_unigrams('./imdb-unigrams.txt', X_train_original, y_train_original)

In [6]:
y_train_agreement, y_test_agreement = generate_appearance(X_train_original, X_test_original, 
                                                          word_list, connotation)

In [7]:
def layer_split(x):
    return tf.split(x,num_or_size_splits=human_terms_len,axis=1)

def layer_concat(x):
    return tf.concat(x, axis=1)

In [8]:
from keras.initializers import Constant, glorot_uniform


input_layer = Input(shape=(X_train.shape[1],))
tanh_output = Dense(1, activation='sigmoid', kernel_initializer=glorot_uniform(seed=42))(input_layer)
model = Model(inputs=input_layer, outputs=tanh_output)

model.compile(loss='binary_crossentropy',
             metrics=['acc'],
             optimizer='adam')

base_model_history = model.fit(X_train[:16667], y_train_original[:16667], 
                                    validation_data=(X_train[16667:], y_train_original[16667:]),
                                    batch_size=1, epochs=1)

Train on 16667 samples, validate on 8333 samples
Epoch 1/1


In [9]:
# build the combined model
# Combined model
human_terms_len = len(word_list)


# base_history = model.fit(X_train[:16667], y_train_original[:16667], 
#                  validation_data=([X_train[16667:], y_train_original[16667:]]),
#                  batch_size=1, epochs=1)

# base_model = build_base_model(X_train.shape[1])

combined_input_layer = Input(shape=(X_train.shape[1],))

# build the hard coded weight for human terms
ht_input_layer = Input(shape=(human_terms_len,))

# split = Lambda( lambda x: tf.split(x,num_or_size_splits=human_terms_len,axis=1))(ht_input_layer)
split = Lambda(layer_split)(ht_input_layer)


# get the document prediction
label_layer = model(combined_input_layer)
tanh_norm = Lambda(lambda x: (x*2)-1)(label_layer)

# stack the multiply layer
dense_layer = []
for i in range(human_terms_len):
#     dense_layer.append(Dense(1, activation='relu',use_bias=False, kernel_initializer='ones')(Multiply()([split[i], label_layer])))
    dense_layer.append(Lambda(lambda x: tf.nn.relu(x))(Multiply()([split[i], tanh_norm])))

# concat all the result   
# concat = Lambda( lambda x: tf.concat(x, axis=1), name='concatenate')(dense_layer)
concat = Lambda(layer_concat, name='concatenate')(dense_layer)


# pass it to sigmoid layer
output_layer = Dense(1, activation='sigmoid')(concat)

combined_model = Model(inputs=[combined_input_layer, ht_input_layer], outputs=output_layer)
combined_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 3686)         0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 83)           0                                            
__________________________________________________________________________________________________
model_1 (Model)                 (None, 1)            3687        input_2[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               [(None, 1), (None, 1 0           input_3[0][0]                    
__________________________________________________________________________________________________
lambda_2 (

In [10]:


combined_model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['acc'])

combined_model_history = combined_model.fit([X_train[:16667],y_train_agreement[:16667]], y_train_original[:16667], 
                                            validation_data=([X_train[16667:], y_train_agreement[16667:]], y_train_original[16667:]),
                                            batch_size=1, epochs=1)

Train on 16667 samples, validate on 8333 samples
Epoch 1/1


In [11]:
score = combined_model.evaluate([X_test, y_test_agreement], y_test_original)
score



[0.3513839517402649, 0.85716]

In [12]:
predict = combined_model.predict([X_test, y_test_agreement])

In [13]:
predict = predict.flatten()
predict.shape

(25000,)

In [14]:
predict_class = (predict > 0.5).astype('int32')
predict_class

array([1, 1, 1, ..., 1, 1, 0])

In [15]:
np.sum(y_test_original == predict_class)/len(predict_class)

0.85716

In [16]:
np.sum(y_test_original==0)

12500

In [17]:
score = combined_model.evaluate([X_train, y_train_agreement], y_train_original)
score



[0.30234828131675723, 0.88084]

In [18]:
# ht_weight = []
# for i in range(len(combined_net_weight)):
#     if i>1 and i < 85:
#         print(combined_net_weight[i].flatten())
#         ht_weight.append(combined_net_weight[i].flatten())
#     else:
#         print(i, combined_net_weight[i].shape)
# ht_weight = np.asarray(ht_weight)

In [19]:
# # Print report on the word transparency


# index = [9, 19]
# def report():
#     for i in index:
#         print()
#         bm = base_model.predict(X_test[i])
        
#         cm = combined_model.predict([X_test[i], 
#                                 np.reshape(y_test_agreement[i], (1,y_test_agreement.shape[1]))])
        
# #         document_output = 'multiply'
# #         document_predict = Model(inputs=combined_model.input,
# #                                      outputs=combined_model.get_layer(document_output).output)
# #         doc_output = document_predict.predict([np.reshape(data[i], (1,5)), 
# #                                       ht_1_input[i], 
# #                                       ht_2_input[i], 
# #                                       ht_3_input[i], 
# #                                       ht_4_input[i]])
        
#         layer_name = 'concatenate'
#         concat_after_relu = Model(inputs=combined_model.input,
#                                      outputs=combined_model.get_layer(layer_name).output)
#         concat_output = concat_after_relu.predict([X_test[i], 
#                                 np.reshape(y_test_agreement[i], (1,y_test_agreement.shape[1]))])
        
#         print(X_test_original[i], '\n\n actual label : ', y_test_original[i], '\n predict from base model : ', bm.flatten(), '\n predict label : ', cm.flatten())
    
#         for i,output in enumerate(concat_output.flatten()):
#             if output != 0:
#                 print(word_list[i], output)

In [20]:
# report()

In [21]:
# report()

In [22]:
score = combined_model.evaluate([X_test, y_test_agreement], y_test_original)



In [23]:
score

[0.3513839517402649, 0.85716]

In [24]:
# 

### Let's see the weight when the trainable is false

In [25]:
# def build_combined_model():
#     # build the combined model
#     # Combined model
#     human_terms_len = len(word_list)

#     base_model = build_base_model(X_train.shape[1])

#     combined_input_layer = Input(shape=(X_train.shape[1],))

#     # build the hard coded weight for human terms
#     ht_input_layer = Input(shape=(human_terms_len,))

#     split = Lambda( lambda x: tf.split(x,num_or_size_splits=human_terms_len,axis=1))(ht_input_layer)

#     # get the document prediction
#     label_layer = base_model(combined_input_layer)

#     # stack the multiply layer
#     dense_layer = []
#     for i in range(human_terms_len):
#         dense_layer.append(Dense(1, activation='relu',use_bias=False, kernel_initializer='ones', trainable=True)(Multiply()([split[i], label_layer])))

#     # concat all the result   
#     concat = Lambda( lambda x: tf.concat(x, axis=1), name='concatenate')(dense_layer)

#     # pass it to sigmoid layer
#     output_layer = Dense(1, activation='sigmoid')(concat)

#     combined_model = Model(inputs=[combined_input_layer, ht_input_layer], outputs=output_layer)
#     combined_model.summary()
    
#     return base_model, combined_model

In [26]:
# base_model, combined_model = build_combined_model()

In [27]:
# base_model.compile(loss='mse',
#                   optimizer='Adagrad',
#                   metrics=['acc'])

# combined_model.compile(loss='mse',
#                       optimizer='Adagrad',
#                       metrics=['acc'])

# base_model_history_train_false = base_model.fit(X_train[:16667], y_train_tanh[:16667], 
#                                     validation_data=(X_train[16667:], y_train_tanh[16667:]),
#                                     batch_size=1, epochs=50)

# combined_model_history_train_false = combined_model.fit([X_train[:16667],y_train_agreement[:16667]], y_train_original[:16667], batch_size=1, epochs=50)

In [28]:
# base_model_history_trainfalse = base_model_history
# combined_model_history_trainfalse = combined_model_history

In [29]:
# net_weights = combined_model.get_weights()

In [30]:
# Evaluate model

### Generate color weighted

In [31]:
import re

class ColoredWeightedDoc(object):
    def __init__(self, doc, human_terms, ht_weights, token_pattern=r"(?u)\b\w\w+\b", binary = False):
        self.doc = doc
        self.human_terms = human_terms
        self.ht_weights = ht_weights
        self.binary = binary
        self.tokenizer = re.compile(token_pattern)
#         self.abs_ranges = np.linspace(0, max([abs(coefs.min()), abs(coefs.max())]), 8)
    def _repr_html_(self):
        html_rep = ""
        tokens = self.doc.split(" ") 
        if self.binary:
            seen_tokens = set()       
        for token in tokens:
            vocab_tokens = self.tokenizer.findall(token.lower())
            if len(vocab_tokens) > 0:
                vocab_token = vocab_tokens[0]
                try:
                    vocab_index = self.human_terms.index(vocab_token)
                    
                    if not self.binary or vocab_index not in seen_tokens:
                        
                        if self.ht_weights[vocab_index] == 0: # human-terms which has been washed out (opposing)
                            html_rep = html_rep + "<font size = 2, color=lightgreen> " + token + " </font>"
                        
                        elif self.ht_weights[vocab_index] != 0: # human-terms transparency
                            html_rep = html_rep + "<font size = 3, color=blue> " + token + " </font>"
                        
                        else: # neutral word
                            html_rep = html_rep + "<font size = 1, color=grey> " + token + " </font>"
                        
                        if self.binary:    
                            seen_tokens.add(vocab_index)
                    
                    else: # if binary and this is a token we have seen before
                        html_rep = html_rep + "<font size = 1, color=grey> " + token + " </font>"
                except: # this token does not exist in the vocabulary
                    html_rep = html_rep + "<font size = 1, color=grey> " + token + " </font>"
            else:
                html_rep = html_rep + "<font size = 1, color=grey> " + token + " </font>"
        return html_rep

In [95]:
# review = list(['I was in Chicago last week. And I wanted to see a movie so bad. I am surprised that the movie itself is just amazing.  The plot was kinda weak, but it was great.  Christopher Nolan is just brilliant. Never fails to amaze. Even though the weather was terrible, but I enjoyed it. No regret! Overall, 10/10. Recommended.'])

review = list(['I went to the movie. The movie was terrible. acting was good, but the plot was awful. the weather is excellent. 1/10 . avoid it.'])

In [96]:
# lr1 = LogisticRegression(C=0.01, random_state=42, penalty='l1')
# lr1.fit(X_train, y_train_original)

review_cv = cv.transform(review)
# words = cv.get_feature_names()
# weights = lr1.coef_.flatten()

y_review_agreement, _ = generate_appearance(review, review, word_list, connotation)



In [97]:
from IPython import display

bm = model.predict(review_cv)
        
cm = combined_model.predict([review_cv, 
                             y_review_agreement])
        
layer_name = 'concatenate'
concat_after_relu = Model(inputs=combined_model.input,
                          outputs=combined_model.get_layer(layer_name).output)
concat_output = concat_after_relu.predict([review_cv, y_review_agreement])
        
print('actual label : ', 0, '\npredict from base model : ', bm.flatten(), '\npredict label : ', cm.flatten())
print()
for i,output in enumerate(concat_output.flatten()):
    if output != 0:
        print(word_list[i], output)


ht_weight = concat_output.flatten()
display.display(ColoredWeightedDoc(review[0], word_list, ht_weight, binary = False))

actual label :  0 
predict from base model :  [0.03950243] 
predict label :  [0.01695214]

1/10 0.9209951
avoid 0.9209951
awful 0.9209951
terrible 0.9209951


In [35]:
ht_weight[12]

0.94634056

In [36]:
y_review_agreement[0][12] = -1

In [37]:
from IPython import display
idx = 1001
# idx = 9002
bm = model.predict(X_test[idx])
        
cm = combined_model.predict([X_test[idx], 
                             np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
layer_name = 'concatenate'
concat_after_relu = Model(inputs=combined_model.input,
                          outputs=combined_model.get_layer(layer_name).output)
concat_output = concat_after_relu.predict([X_test[idx], np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
print('actual label : ', y_test_original[idx], '\npredict from base model : ', bm.flatten(), '\npredict label : ', cm.flatten())
print()
for i,output in enumerate(concat_output.flatten()):
    if output != 0:
        print(word_list[i], output)


ht_weight = concat_output.flatten()
display.display(ColoredWeightedDoc(X_test_original[idx], word_list, ht_weight, binary = False))

actual label :  0 
predict from base model :  [8.6201925e-07] 
predict label :  [0.00044158]

awful 0.9999983
boring 0.9999983
horrible 0.9999983
pathetic 0.9999983
ridiculous 0.9999983
stupid 0.9999983
waste 0.9999983


In [38]:
np.where(np.sum(y_test_agreement, axis=1)!=0)[0][1000:1010]

array([1231, 1232, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241],
      dtype=int64)

In [39]:
score = combined_model.evaluate([X_test, y_test_agreement], y_test_original)
score



[0.3513839517402649, 0.85716]

In [40]:
concat_all = concat_after_relu.predict([X_test, y_test_agreement])

In [41]:
concat_all.shape

(25000, 83)

In [42]:
indices = np.where(np.sum(concat_all, axis=1)!=0)
indices = indices[0]
len(indices)

22049

In [43]:
not_zero_predict = combined_model.evaluate([X_test[indices], y_test_agreement[indices]], y_test_original[indices])



In [44]:
not_zero_predict

[0.30619207704719803, 0.8978638487006213]

In [45]:
(X_test.shape[0] - len(indices))/X_test.shape[0]

0.11804

In [46]:
combined_model.metrics_names

['loss', 'acc']

In [47]:
combined_model.save_weights('imdb_human_term_weight.hdf5')

# Get cases

In [84]:
from IPython import display
idx = 7079
# idx = 9002
bm = model.predict(X_test[idx])
        
cm = combined_model.predict([X_test[idx], 
                             np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
layer_name = 'concatenate'
concat_after_relu = Model(inputs=combined_model.input,
                          outputs=combined_model.get_layer(layer_name).output)
concat_output = concat_after_relu.predict([X_test[idx], np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
print('actual label : ', y_test_original[idx], '\npredict from base model : ', bm.flatten(), '\npredict label : ', cm.flatten())
print()
for i,output in enumerate(concat_output.flatten()):
    if output != 0:
        print(word_list[i], output)


ht_weight = concat_output.flatten()
display.display(ColoredWeightedDoc(X_test_original[idx], word_list, ht_weight, binary = False))

actual label :  1 
predict from base model :  [0.9981029] 
predict label :  [0.99961615]

7/10 0.9962058
amazing 0.9962058
beautiful 0.9962058
fun 0.9962058
loved 0.9962058
wonderful 0.9962058


In [49]:
from IPython import display
idx = 11050
# idx = 9002
bm = model.predict(X_test[idx])
        
cm = combined_model.predict([X_test[idx], 
                             np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
layer_name = 'concatenate'
concat_after_relu = Model(inputs=combined_model.input,
                          outputs=combined_model.get_layer(layer_name).output)
concat_output = concat_after_relu.predict([X_test[idx], np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
print('actual label : ', y_test_original[idx], '\npredict from base model : ', bm.flatten(), '\npredict label : ', cm.flatten())
print()
for i,output in enumerate(concat_output.flatten()):
    if output != 0:
        print(word_list[i], output)


ht_weight = concat_output.flatten()
display.display(ColoredWeightedDoc(X_test_original[idx], word_list, ht_weight, binary = False))

actual label :  1 
predict from base model :  [0.9872471] 
predict label :  [0.99959224]

best 0.9744942
fascinating 0.9744942
favorite 0.9744942
great 0.9744942
subtle 0.9744942
wonderful 0.9744942
wonderfully 0.9744942


In [50]:
from IPython import display
idx = 21625
# idx = 9002
bm = model.predict(X_test[idx])
        
cm = combined_model.predict([X_test[idx], 
                             np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
layer_name = 'concatenate'
concat_after_relu = Model(inputs=combined_model.input,
                          outputs=combined_model.get_layer(layer_name).output)
concat_output = concat_after_relu.predict([X_test[idx], np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
print('actual label : ', y_test_original[idx], '\npredict from base model : ', bm.flatten(), '\npredict label : ', cm.flatten())
print()
for i,output in enumerate(concat_output.flatten()):
    if output != 0:
        print(word_list[i], output, connotation[word_list[i]])


ht_weight = concat_output.flatten()
display.display(ColoredWeightedDoc(X_test_original[idx], word_list, ht_weight, binary = False))

actual label :  1 
predict from base model :  [0.00657608] 
predict label :  [0.00018597]

avoid 0.9868478 0
bad 0.9868478 0
boring 0.9868478 0
dull 0.9868478 0
funny 0.9868478 0
pointless 0.9868478 0
worst 0.9868478 0


In [91]:
from IPython import display
idx = 30
# idx = 9002
bm = model.predict(X_test[idx])
        
cm = combined_model.predict([X_test[idx], 
                             np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
layer_name = 'concatenate'
concat_after_relu = Model(inputs=combined_model.input,
                          outputs=combined_model.get_layer(layer_name).output)
concat_output = concat_after_relu.predict([X_test[idx], np.reshape(y_test_agreement[idx], (1,y_test_agreement.shape[1]))])
        
print('actual label : ', y_test_original[idx], '\npredict from Regular Document Classifier : ', bm.flatten(), '\nFinal Prediction : ', cm.flatten())
print()
# for i,output in enumerate(concat_output.flatten()):
#     if output != 0:
#         print(word_list[i], output)


ht_weight = concat_output.flatten()
display.display(ColoredWeightedDoc(X_test_original[idx], word_list, ht_weight, binary = False))

actual label :  0 
predict from Regular Document Classifier :  [0.08725411] 
Final Prediction :  [0.24311519]



In [52]:
y_test_agreement[14689]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [53]:
positive = np.sum(y_test_agreement==1, axis=1)

In [54]:
negative = np.sum(y_test_agreement==-1, axis=1)

In [55]:
np.argmax(positive)

21728

In [56]:
np.max(negative)

17

In [57]:
X_train_original[np.argmax(negative)]

'the movie has a distinct (albeit utish and rough) humanity for all its borderline depravity - the zippy/lyrical score point up the comic side of their misadventure, and even when they are at their most thuggish (like terrorizing the woman on the train), a semi-pitiful vulnerability lurk never far away (dewaere suck on her east like a baby). blier cut away from the scene where depardieu may be about to rape dewaere, so we are never sure how explicitly to read the manifestly homoerotic aspect of their relationship - either way, that incident is the start of their relative humanization (so the movie could certainly be read as pro-gay, although it could likely be read as pro-anything you want). the movie has many objectionable scene and point of sexual politic and is probably best taken as a general cartoon on the foible of both sex, making a mockery of the whole notion of sensitivity and honesty, and hitting numerous point of possible profundity on the basis that if you fire off enough s

In [58]:
np.argmax(negative)

4378

In [59]:
np.argmax(positive)

21728

In [60]:
positive = positive.reshape(25000, 1)
negative = negative.reshape(25000, 1)

stack = np.hstack([positive, negative])
stack.shape

(25000, 2)

In [61]:
new = np.absolute(stack[:,0] - stack[:,1])

In [62]:
ind = []
for i in range(new.shape[0]):
    if new[i] <= 1 and (stack[i,0] >6  or stack[i,1] >6):
        ind.append(i)

In [63]:
ind

[2494, 7079, 11050, 12559, 20031, 21625, 21874, 23852]

In [64]:
stack_balance = []
for i in ind:
    stack_balance.append(stack[i])

In [65]:
stack_balance

[array([7, 7]),
 array([6, 7]),
 array([7, 6]),
 array([7, 7]),
 array([7, 7]),
 array([6, 7]),
 array([7, 7]),
 array([6, 7])]

In [66]:
np.argmax(stack_balance)

0

In [68]:
matrices = concat_after_relu.predict([X_test, y_test_agreement])

In [79]:
null_indices = np.where(np.sum(matrices, axis=1) == 0)[0]

In [80]:
for i in null_indices[:20]:
    print(i)

8
36
60
64
74
76
78
87
90
91
95
102
104
112
120
126
130
134
140
145


In [78]:
len(null_indices)

1

In [71]:
X_test_original[8]

'a very positive message for our youth is shown in this movie. through the sport of rugby as a mean, a high school rugby coach leads his player in their behavior and lifestyle, on and off the field. the acting and directing are good. the rugby shot are just ok if you know some rugby or really cool if do not know much about it.i recommend this movie, worth watching, especially at a point in time when this sport is gaining more and more adept every day.the plot is based on a true story taking place in utah. the coach portrayed in this movie has been at the helm of the highlander for over 30 year (and loosing only game every 3 year). enjoy!'

In [72]:
X_test_original[15]

'why do person need to follow the opinion of the herd of mass and critic? random heart, directed by the illiant sydney pollack (who has a small role in the film too) is another harrison ford vehicle. as such, it is quite good and entertaining. surely, anyone who goes to see it has this in mind, or read the book which is no better. even kristin scott thomas fan, myself included, knew it would be a variation of her again playing the love interest of her eldest uncle. even as such, the film is satisfying. what is so bad about this movie that is much better in the other (much higher rated) harrison ford vehicle? this film is no masterpiece, but it is not as bad as the mass would have the potential viewer believe.'

In [87]:
neg_indices = np.where(model.predict(X_test) < 0.2)[0]

In [89]:
neg_indices[:20]

array([ 3,  4,  5, 10, 16, 17, 22, 24, 25, 27, 28, 30, 31, 39, 42, 45, 46,
       49, 51, 54], dtype=int64)