In [1]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding, Flatten, GlobalMaxPool1D, Dropout, Conv1D
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from keras.losses import binary_crossentropy
from keras.optimizers import Adam
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import keras
import json

import firebase_admin
from firebase_admin import credentials
from firebase_admin import db
from firebase_admin import firestore


Using TensorFlow backend.


In [2]:
model_category = keras.models.load_model('model_category.h5')
model_preference = keras.models.load_model('model_preference.h5')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [3]:
with open("categories.json","r") as fp:
    categories = json.load(fp)
encoder_category = LabelEncoder()
encoder_category.fit(categories)

with open("preferences.json","r") as fp:
    preferences = json.load(fp)
encoder_preference = LabelEncoder()
encoder_preference.fit(preferences)

LabelEncoder()

### Read unlabeled events from Firestore

In [4]:
cred = credentials.Certificate('evently-key.json')
firebase_admin.initialize_app(cred)
store = firestore.client()
collection_name = "events"

In [5]:
doc_ref = store.collection(collection_name)
try:
    docs = doc_ref.get()
except google.cloud.exceptions.NotFound:
    print(u'No such collection!')
data = []
for doc in docs:
    # print(u'{} => {}'.format(doc.id, doc.to_dict()))
    data.append(doc)

  app.launch_new_instance()


In [25]:
# Arguments:
#    X: raw event texts
#    models for category and preference
#    Label encoder for category
#    Label encoder for preference
#    top_classes: number of categories predicted
# Return:
#    category predictions: a list of list of size top_classes
#    preference predictions: a list of list
def predict(X,model_category,model_preference,encoder_category, encoder_preference, top_classes=1):
    tokenizer = Tokenizer(num_words=5000, lower=True)
    tokenizer.fit_on_texts(X)
    sequences = tokenizer.texts_to_sequences(X)
    X = pad_sequences(sequences, maxlen=180, padding="post")
    # Predict category
    category_pred = model_category.predict(X)
    top_y = (-category_pred).argsort(axis=-1)[:,:top_classes]
    # Predict preference     
    preference_pred = model_preference.predict(X)
    preference_tags = map(lambda x: x.lower(),encoder_preference.classes_)
    return [encoder_category.inverse_transform(y) for y in top_y], [dict(zip(preference_tags, map(lambda x: x.item(),pred))) for pred in preference_pred]


### Preprocessing text and utilize trained model to predict

In [26]:
X = []
docId = []
for doc in data:
    x = doc.to_dict()['eventName']
    if 'description' in doc.to_dict() and doc.to_dict()['description']:
        x+=' '+doc.to_dict()['description']
    X.append(x)
    docId.append(doc.id)
predictedCategory, predictedPreference = predict(X,model_category,model_preference,encoder_category,encoder_preference,top_classes = 2)
pairs = zip(predictedCategory,predictedPreference,docId)
print len(pairs)

1027


In [27]:
pairs[0]

(array([u'Art', u'Food&Drink'], dtype='<U12'),
 {u'active': 0.28732144832611084,
  u'cultural': 0.19341245293617249,
  u'lit': 0.28528550267219543,
  u'relaxing': 0.2156735360622406},
 u'Eventbrite47360736237')

### Write back to Firestore

In [29]:
for categories,preferences,docId in pairs:
    event_ref = store.collection(collection_name).document(docId)
    event_ref.set({
        u'categories': categories.tolist(),
        u'preferences': preferences
    }, merge=True)
print "Done"

Done
