In [18]:
import pickle
import spacy
import numpy as np
from keras.models import load_model

class Embedder(object):
    def __init__(self, max_length=100):
        self.max_length = max_length
        print("Loading spacy...")
        self.nlp = spacy.load('en')
        print("...done")

    def embed(self, text):
        return self._pad(self._vectors(text))

    def shape(self):
        return (self.max_length, 300)

    def _vectors(self, text):
        doc = self.nlp(text)
        vectors = []
        for token in doc:
            vectors.append(token.vector)
        return vectors

    def _pad(self, vectors):
        vector_dim = len(vectors[0])
        sequence = np.zeros((self.max_length, vector_dim))
        for i, vector in enumerate(vectors):
            if i == self.max_length:
                break
            sequence[i] = vector
        return sequence

embedder = Embedder()

Loading spacy...
...done


In [11]:
import json
import numpy as np

from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.models import Model, load_model


import pickle

def load_data(limit=0):
    examples = []
    intent_names = {}
    with open('json.json') as data_file:
        data = json.load(data_file)
    for i, intent in enumerate(data):
        intent_names[i] = intent["prompt"]
        for message in intent["response"]:
            examples.append((message, i))
    np.random.shuffle(examples)
    if limit >= 1:
        examples = examples[:limit]
    messages, intents = zip(*examples)
    return examples, intent_names

load_data()

[('no i want three tickets for sunday night', 510), ('yes do you also have tickets for the british museum', 433), ('yes have you also got tickets for madame tussauds please', 432), ("no i'd like a red sweatshirt", 345), ('can you please give me three tickets for wednesday night', 506), ('i want a yellow t-shirt please', 337), ('can you offer me a white t-shirt', 353), ('no i will have my hamburger rare please', 309), ('yes i need tickets for tomorrow night', 530), ('thank you can i buy three tickets', 468), ('no can you offer me red boots please', 328), ('yes could i also have a shirt please', 392), ('can you please give me purple jeans', 363), ('no could you give me one ticket for thursday night please', 489), ('i want three tickets for the phantom of the opera please', 455), ('yes i should like a yellow shirt', 335), ('no can you offer me three tickets for friday night', 508), ('no i am looking for one ticket', 466), ('could you offer me four tickets for the phantom of the opera plea

([('no i want three tickets for sunday night', 510),
  ('yes do you also have tickets for the british museum', 433),
  ('yes have you also got tickets for madame tussauds please', 432),
  ("no i'd like a red sweatshirt", 345),
  ('can you please give me three tickets for wednesday night', 506),
  ('i want a yellow t-shirt please', 337),
  ('can you offer me a white t-shirt', 353),
  ('no i will have my hamburger rare please', 309),
  ('yes i need tickets for tomorrow night', 530),
  ('thank you can i buy three tickets', 468),
  ('no can you offer me red boots please', 328),
  ('yes could i also have a shirt please', 392),
  ('can you please give me purple jeans', 363),
  ('no could you give me one ticket for thursday night please', 489),
  ('i want three tickets for the phantom of the opera please', 455),
  ('yes i should like a yellow shirt', 335),
  ('no can you offer me three tickets for friday night', 508),
  ('no i am looking for one ticket', 466),
  ('could you offer me four tick

In [12]:
def dummy_encode(array, num_classes=None):
    array = np.array(array)
    if num_classes is None:
        num_classes = max(array) + 1
    result = np.zeros((len(array), num_classes))
    result[np.arange(len(array)), array] = 1
    return result

def create_dataset(type):
    if type == 'train':
        data_dir = 'data/train'
    elif type == 'dev':
        data_dir = 'data/dev'
    else:
        assert(False), "Type must be train or dev"
    examples, intent_names = load_data()
    X = []
    y = []
    for example in examples:
        message, intent = example[0], example[1]
        X.append(embedder.embed(message))
        y.append(intent)
    return np.array(X), dummy_encode(np.array(y)), intent_names

In [15]:
X_train, y_train, intent_names_train = create_dataset('train')

[('i am looking for a purple sweatshirt', 349), ('no can i have tickets for monday evening please', 522), ('thank you i am looking for one ticket for monday evening please', 486), ('can you please offer me two tickets for sunday evening', 501), ('thank you can i have some boots please', 318), ('i need a purple t-shirt', 350), ('can i have two tickets for tonight please', 502), ('i will have a hairdryer', 225), ('thank you could i also have tickets for the london eye', 434), ('can i please have a size l', 404), ('i would like red jeans', 361), ('i am looking for tickets for thursday night please', 525), ('yes can you please offer me red sandals', 327), ('yes could i buy four tickets for thursday evening please', 516), ('i should like to go monday night', 531), ('could you offer me three tickets for sunday night', 510), ('yes i would also like a t-shirt', 394), ('can i have three tickets for tomorrow evening', 512), ("yes i'd like two tickets for tuesday evening please", 496), ('thank yo

NameError: name 'embedder' is not defined

In [None]:
X_train, y_train, intent_names_train = create_dataset('train')
X, y, intent_names = X_train, y_train, intent_names_train

[('yes can i have tickets for saturday evening', 527), ('no could i buy one ticket for wednesday evening please', 488), ('no can i buy one ticket for tomorrow evening please', 494), ('yes i should like red jeans please', 361), ('yes can you offer me one ticket for thursday evening', 489), ('i would like to have a fanta please', 273), ("yes i'd like sweatshirts please", 393), ("yes i'd like three tickets for sunday night", 510), ('no could you please give me four tickets for friday night', 517), ('thank you could i have four tickets for saturday evening please', 518), ('yes could you give me white jeans please', 365), ('no i want one ticket', 466), ('i think i would like an orange juice', 276), ('yes i will have three tickets for tonight', 511), ("no i'd like four tickets for this evening please", 520), ('could you give me a tee shirt', 394), ('yes could you give me two tickets for monday evening please', 495), ('yes can you offer me red sandals', 327), ('yes i would like my steak blue'