## Python Core Demo

How to use the python core from a Jupyter notebook. It also shows how to debug the JSON application configs which are used to connect to external database(s) and redis server(s).


In [25]:
import redis
import time 
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import base64
import pickle
from collections import defaultdict
rDB = redis.Redis(host='localhost', port=6379, db=0)
def store_numpy(name,mat):
    """
      Creates a matrix from a numpy matrix
    """
    if len(mat.shape) != 2:
        raise BaseException('Shape of input matrix must be of size 2')
    rows,cols = mat.shape
    array_dtype = str(mat.dtype)
    m = mat.ravel().tostring()
    key = '{0}|{1}#{2}#{3}'.format(int(time.time()), array_dtype, rows, cols)
    rDB.set(name,m)
    return key

def get_numpy(name,key):
    d_mat = rDB.get(name)
    array_dtype, row, col = key.split('|')[1].split('#')
    return np.fromstring(d_mat, dtype=array_dtype).reshape(int(row), int(col))
def set_builder(lst):
    output = []
    for l in lst:
        output+=list(set(l))
    return list(set(output))
pipe = rDB.pipeline()
keys = ['doc_by_vocab_data']
[pipe.get(k) for k in keys]
result = pipe.execute()
data = {k:result[i] for i,k in enumerate(keys)}
key = data['doc_by_vocab_data']
doc_by_vocab = get_numpy('doc_by_vocab',key)
print(doc_by_vocab.shape)

(5210, 4676)


In [26]:
doc_by_vocab.dtype

dtype('float64')

In [27]:
vocab_to_index = pickle.loads(rDB.get('vocab_to_index'))
index_to_vocab = pickle.loads(rDB.get('index_to_vocab'))
episode_to_index = pickle.loads(rDB.get('e_t_to_i'))
index_to_episode = pickle.loads(rDB.get('e_i_to_t'))
series_to_index = pickle.loads(rDB.get('s_t_to_i'))
index_to_series = pickle.loads(rDB.get('s_i_to_t'))
series_episodes = pickle.loads(rDB.get('series_episodes'))
episode_tags = pickle.loads(rDB.get('e_tags'))

In [28]:
n_users = 1

In [29]:
index_to_tag = set_builder(episode_tags.values())
n_tags = len(index_to_tag)
tag_to_index = {t:i for i,t in enumerate(index_to_tag)}
episode_tag_matrix = np.zeros([len(index_to_episode),n_tags])
for i in xrange(n_tags):
    for tag in episode_tags[index_to_episode[i]]:
        episode_tag_matrix[i][tag_to_index[tag]] = 1

In [30]:
from keras.layers import Dense, Input, Embedding, Flatten, SimpleRNN
from keras.models import Model
from keras.layers.merge import dot
from keras.losses import mean_squared_error
user_in = Input(shape=(1,), dtype = 'int32')
true_rating_in = Input((1,), dtype = 'float32')
meta_features_in = Input(shape=(doc_by_vocab.shape[1],), dtype = 'float32')
user_embeddings = Embedding(n_users, 5)
user_encoded = Flatten()(user_embeddings(user_in))
episode_encoded = Dense(5)(meta_features_in)
prediction = dot([user_encoded, episode_encoded], axes = 1)
model = Model(inputs = [user_in, meta_features_in,true_rating_in],
                     outputs = prediction)
model.compile(loss = 'mse', optimizer = 'adam')
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_7 (InputLayer)             (None, 1)             0                                            
____________________________________________________________________________________________________
embedding_3 (Embedding)          (None, 1, 5)          5                                            
____________________________________________________________________________________________________
input_9 (InputLayer)             (None, 4676)          0                                            
____________________________________________________________________________________________________
flatten_3 (Flatten)              (None, 5)             0                                            
___________________________________________________________________________________________

In [38]:
ratings = np.zeros((doc_by_vocab.shape[0],n_users))
series_name = index_to_series[437]
series_name2 = index_to_series[137]
for i in [episode_to_index[e] for e in series_episodes[series_name]]:
    ratings[i] = 1.0
for i in [episode_to_index[e] for e in series_episodes[series_name2]]:
    ratings[i] = 1.0
# for i in [k for k,v in episode_tags.iteritems() if ('comedy' in v) or ('humor' in v)]:
#     ratings[i] = 5.0
users = np.zeros((doc_by_vocab.shape[0],1))
labels = np.zeros((doc_by_vocab.shape[0],1))

In [37]:
model.fit([users,doc_by_vocab,ratings],labels,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11e7eed50>

In [46]:
[index_to_episode[e_i] for e_i in np.array([model.predict([np.array([0]),doc_by_vocab[v].reshape(1,4676),np.array([1.])], batch_size=32, verbose=2)[0][0] for v in range(doc_by_vocab.shape[0])]).argsort()][:10]

['BSCC 011 - Kristy and the Snobs',
 '42. The Big Reveal',
 'Texting in A Movie Makes You An Asshole w/ Anton Yelchin & Jeremy Saulnier! LMMT #12',
 'Episode 062: How Thoughts Can Heal',
 '26: Does Jenny Have What it Takes? Her Family Members Get Brutally Honest',
 "019 - Tim's Kids - Spoiled Rich Kid, Flip Phones and Hockey Talk",
 'The Dave Portnoy Show Preview',
 'DRUNK FLORIDA MAN ATTACKS SKYDOESMINECRAFT?! uh oh LMMT #21',
 '065: Why I Budget Monthly, Semi-Monthly and Weekly by Cait Flanders of Blonde On A Budget (Track Income & Expenses Free)',
 'Ep 51: The Wild West of Microblading']

In [63]:
from keras.layers import Dense, Input, Embedding, Flatten, Dropout, Bidirectional, GRU
from keras.models import Model
from keras.layers.merge import dot
from keras.layers.pooling import AveragePooling1D
from keras.losses import mean_squared_error
from keras.regularizers import l2 as l2reg

user_in = Input(shape=(1,), dtype = 'int32')
true_rating_in = Input((1,), dtype = 'float32')
meta_features_in = Input(shape=(doc_by_vocab.shape[1],), dtype = 'float32')
##########
user_embeddings = Embedding(n_users, 200)
user_encoded = Flatten()(user_embeddings(user_in))
##########
episode_embeddings = Embedding(doc_by_vocab.shape[1], 200,embeddings_regularizer=l2reg(0.01))
episode_encoded = Flatten()(episode_embeddings(meta_features_in))
episode_first_dropout = Dropout(0.1)(episode_encoded)
episode_post_first_rnn = Bidirectional(GRU(units=400))(episode_first_dropout)
episode_second_dropout = Dropout(0.5)(episode_post_first_rnn)
episode_post_second_rnn = Bidirectional(GRU(units=200))(episode_second_dropout)
episode_third_dropout = Dropout(0.3)(episode_post_second_rnn)
episode_encoded_final = AveragePooling1D()(episode_third_dropout)
prediction = dot([user_encoded, episode_encoded_final], axes = 1)
model = Model(inputs = [user_in, meta_features_in,true_rating_in],
                     outputs = prediction)
model.compile(loss = 'mse', optimizer = 'adam')
model.summary()

IndexError: tuple index out of range