In [1]:
import os
import json

def build_label_dict():
    d = {}
    label_path = r"./Frames-dataset/labels.txt"
    label_file = open(label_path, encoding='utf-8')
    for line in label_file:
        k, v = line.split(",")
        d[k] = True if v.strip()=="True" else False
    return d




In [2]:
#returns chat dictionary that include the label of each chat, and turns where each turn has sentences list and elapsed 
#time + the speaker id

def gen_chat_data():
    chat_path = r"./Frames-dataset/chats"
    chats = {}
    d = build_label_dict()
    for filename in os.listdir(chat_path):
        chat_file = open(os.path.join(chat_path, filename), encoding='utf-8')
        chat = json.load(chat_file)
        
        turns = []
        if 'turns' in chat:
            tsp = chat['turns'][0]['timestamp']
       
            for turn in chat['turns']:
               
                ts = turn['timestamp'] - tsp
                tsp = turn['timestamp']
                turns.append({"ti":ts,"text":turn["text"],"author":turn["author"]})

        chats[filename[:-5]] = {}
        chats[filename[:-5]]["turns"] = turns
        chats[filename[:-5]]["label"] = d[filename[:-5]]
    return chats


In [3]:
import rdflib
import os, sys

text = sys.argv[1]

def load_graph():
    g = rdflib.Graph()
    g.parse("onto.nt", format="nt")
    return g

def check_in_ontology(g, a, b):
    q = "select * where {<http://o.org/"+a+"> ?p <http://o.org/"+b+">}"
    x1 = g.query(q)
    for x in x1:
        print()
        print(a,x,b)
        get_type(g, a)
        get_type(g, b)
    
    return x1


def get_type(g, a):
    q = "select ?t where {<http://o.org/"+a+"> <http://o.org/type> ?t}"
    x1 = g.query(q)
    for x in x1:
        print(a,x)


def normalize_word(w):
    # TODO: stemming
    return w.replace(",","").replace(".","").strip()
    
def check_text(g, text):
    text = text.split()
    for t1 in text:
        t1 = normalize_word(t1)
        for t2 in text:
            t2 = normalize_word(t2)
            if t1 != t2:
                check_in_ontology(g, t1, t2)
                
def geographic_difference(g, s, t):
    # Currently - equal weight for every connection
    s = s.lower()
    t = t.lower()
    if s == t:
        return 0
    return 3 - len(check_in_ontology(g, s, t))

def month_to_num(m):
    if m == "jan":
        return 1
    if m == "feb":
        return 2
    if m == "mar":
        return 3
    if m == "apr":
        return 4
    if m == "may":
        return 5
    if m == "jun":
        return 6
    if m == "jul":
        return 7
    if m == "aug":
        return 8
    if m == "sep":
        return 9
    if m == "oct":
        return 10
    if m == "nov":
        return 11
    if m == "dec":
        return 12
    

def date_difference(s, t):
    s_splitted = s.lower().split()
    t_splitted = t.lower().split()
    s_m = month_to_num(s_splitted[0][0:3])
    s_d = s_splitted[1]
    t_m = month_to_num(t_splitted[0][0:3])
    t_d = t_splitted[1]
    
    #print(s_m, s_d, t_m, t_d)

    return abs((s_m*30+int(s_d)) - (t_m*30+int(t_d)))

def prepare_frames_vector(chat,g):
    dst_city = or_city = str_date = end_date = ""
    or_city_diff = dst_city_diff = str_date_diff = end_date_diff = 0
    for t in chat["turns"]:
        if "text" in t:
            prev_or_city = or_city
            prev_dst_city = dst_city
            prev_str_date = str_date
            prev_end_date = end_date
            
            for f in t["labels"]["frames"]:
                #print(f["info"])
                info = f["info"]
                if "or_city" in info:
                    or_city = info["or_city"][0]["val"]
                if "dst_city" in info:
                    dst_city = info["dst_city"][0]["val"]
                if "str_date" in info:
                    str_date = info["str_date"][0]["val"]
                if "end_date" in info:
                    end_date = info["end_date"][0]["val"]

            if len(prev_or_city) > 1:
                or_city_diff = geographic_difference(g, prev_or_city, or_city)
            if len(prev_dst_city) > 1:
                dst_city_diff = geographic_difference(g, prev_dst_city, dst_city)
                
            if len(prev_str_date) > 1:
                str_date_diff = date_difference(str_date, prev_str_date)
            if len(prev_end_date) > 1:
                end_date_diff = date_difference(end_date, prev_end_date)

            print()
            print(t["author"])
            print(t["text"])                
            print(or_city_diff, dst_city_diff, str_date_diff, end_date_diff)
            
            final_vector.append([t["author"], t["text"], or_city_diff+dst_city_diff, str_date_diff+end_date_diff])
    
    return final_vector

In [4]:
import numpy as np
# import pandas as pd
import _pickle as cPickle
from collections import defaultdict
import re

from numpy.linalg import norm

import sys
import os

os.environ['KERAS_BACKEND']='tensorflow'

from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.layers.merge import concatenate,multiply
from keras.layers import merge
from keras.layers import Embedding
from keras.layers import Dense, Input, Flatten, Lambda
from keras.layers import RepeatVector, Embedding, Merge, Dropout, LSTM, GRU, Bidirectional, TimeDistributed,Permute,Reshape,Activation
from keras.models import Model

from keras import backend as K
from keras.engine.topology import Layer, InputSpec
from keras import initializers
import theano
import functools
import nltk
nltk.download('punkt')
from nltk import tokenize

MAX_SENT_LENGTH = 40
MAX_SENTS = 20
MAX_NB_WORDS = 20000
MAX_TURNS = 60
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.1
TEST_SPLIT = 0.2
RNN_DIM = 50
GLOVE_DIR = "./data/glove"

Using TensorFlow backend.


[nltk_data] Error loading punkt: <urlopen error [Errno 111] Connection
[nltk_data]     refused>


In [5]:
def create_data(chats,l,flag=False,length=0):
    texts = []
    labels=[]
    turns=[]
    for idx in chats.keys():
        if flag:
            text = "\n".join([x["text"] for x in chats[idx]["turns"][:length]])
            
        else:
            text = "\n".join([x["text"] for x in chats[idx]["turns"]])
        texts.append(text)
        sentences = tokenize.sent_tokenize(text)
        turns.append(sentences)
        labels.append(chats[idx]["label"])
        
    tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
    tokenizer.fit_on_texts(texts)

    data = np.zeros((l, MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
    for i, sentences in enumerate(turns):
        if flag and i==length:
            break;
        for j, sent in enumerate(sentences):
            if j< MAX_SENTS:
                wordTokens = text_to_word_sequence(sent)
                k=0
                for _, word in enumerate(wordTokens):
                    if k<MAX_SENT_LENGTH and tokenizer.word_index[word]<MAX_NB_WORDS:
                        data[i,j,k] = tokenizer.word_index[word]
                        k=k+1   
    return data



In [6]:
def create_data_with_turns(chats, l,flag=False,length=0):
    texts = []
    labels=[]
    chats_txt=[]
    vecs=[]
    for idx in chats.keys():
        if flag:
            text = "\n".join([x["text"] for x in chats[idx]["turns"][:length]])
            
        else:
            text = "\n".join([x["text"] for x in chats[idx]["turns"]])
        turns=[]
        vec=[]
        for turn in chats[idx]["turns"]:
            vec.append([len(turn["text"]),turn["ti"],0 if turn["author"].lower()=="wizard" else 1])
            texts.append(turn["text"])
            sentences = tokenize.sent_tokenize(turn["text"])
            turns.append(sentences)
        vecs.append(vec)
        chats_txt.append(turns)
        labels.append(chats[idx]["label"])
        
    tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
    tokenizer.fit_on_texts(texts)

    data = np.zeros((l, MAX_TURNS,MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
    for m, turns in enumerate(chats_txt):
        for i, sentences in enumerate(turns):
            if flag and i==length:
                break;
            for j, sent in enumerate(sentences):
                if j< MAX_SENTS:
                    wordTokens = text_to_word_sequence(sent)
                    k=0
                    for _, word in enumerate(wordTokens):
                        if k<MAX_SENT_LENGTH and tokenizer.word_index[word]<MAX_NB_WORDS:
                            data[m,i,j,k] = tokenizer.word_index[word]
                            k=k+1   
    return data,vecs,tokenizer



In [7]:
def create_texts_labels(chats,flag=False,length=0):
    texts=[]
    labels=[]
    for idx in chats.keys():
        if flag:
        
            text = "\n".join([x["text"] for x in chats[idx]["turns"][:length]])
        else:
            text = "\n".join([x["text"] for x in chats[idx]["turns"]])
        texts.append(text)
        sentences = tokenize.sent_tokenize(text)
        labels.append(chats[idx]["label"])   
    return texts,labels


In [8]:



def prepare_datasets(chats,flag=False, length =0):
    if flag:
        chats = {k: v for k, v in chats.items() if len(v["turns"])>length}
    
    texts,labels = create_texts_labels(chats,flag,length)

    tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
    tokenizer.fit_on_texts(texts)
    data = create_data(chats,len(texts),flag,length)
    word_index = tokenizer.word_index
    print('Total %s unique tokens.' % len(word_index))

    labels = to_categorical(np.asarray(labels))
    print('Shape of data tensor:', data.shape)
    print('Shape of label tensor:', labels.shape)
#     data = data[:13]
#     labels = labels[:13]
    indices = np.arange(len(data))
    np.random.shuffle(indices)
    data = data[indices]
    labels = labels[indices]
    nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
    nb_test_samples = int(TEST_SPLIT * data.shape[0])

    x_train = data[:-(nb_validation_samples+nb_test_samples)]
    y_train = labels[:-(nb_validation_samples+nb_test_samples)]
    x_val = data[-(nb_validation_samples+nb_test_samples):-nb_test_samples]
    y_val = labels[-(nb_validation_samples+nb_test_samples):-nb_test_samples]
    x_test = data[-nb_test_samples:]
    y_test = labels[-nb_test_samples:]
    
    return word_index,x_train, y_train,x_val, y_val,x_test,y_test


In [9]:

def prepare_datasets_turns(chats,flag=False, length =0):
    if flag:
        chats = {k: v for k, v in chats.items() if len(v["turns"])>length}
    
    texts,labels = create_texts_labels(chats,flag,length)

#     tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
#     tokenizer.fit_on_texts(texts)
    data,_,tokenizer = create_data_with_turns(chats,len(texts),flag,length)
    word_index = tokenizer.word_index
    print('Total %s unique tokens.' % len(word_index))

    labels = to_categorical(np.asarray(labels))
    print('Shape of data tensor:', data.shape)
    print('Shape of label tensor:', labels.shape)
#     data = data[:13]
#     labels = labels[:13]
    indices = np.arange(len(data))
    np.random.shuffle(indices)
    data = data[indices]
    labels = labels[indices]
    nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
    nb_test_samples = int(TEST_SPLIT * data.shape[0])

    x_train = data[:-(nb_validation_samples+nb_test_samples)]
    y_train = labels[:-(nb_validation_samples+nb_test_samples)]
    x_val = data[-(nb_validation_samples+nb_test_samples):-nb_test_samples]
    y_val = labels[-(nb_validation_samples+nb_test_samples):-nb_test_samples]
    x_test = data[-nb_test_samples:]
    y_test = labels[-nb_test_samples:]
    
    return word_index,x_train, y_train,x_val, y_val,x_test,y_test

In [10]:

def prepare_datasets_turns(chats,flag=False, length =0):
    if flag:
        chats = {k: v for k, v in chats.items() if len(v["turns"])>length}
    
    texts,labels = create_texts_labels(chats,flag,length)

#     tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
#     tokenizer.fit_on_texts(texts)
    data,_,tokenizer = create_data_with_turns(chats,len(texts),flag,length)
    word_index = tokenizer.word_index
    print('Total %s unique tokens.' % len(word_index))

    labels = to_categorical(np.asarray(labels))
    print('Shape of data tensor:', data.shape)
    print('Shape of label tensor:', labels.shape)
#     data = data[:13]
#     labels = labels[:13]
    indices = np.arange(len(data))
    np.random.shuffle(indices)
    data = data[indices]
    labels = labels[indices]
    nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
    nb_test_samples = int(TEST_SPLIT * data.shape[0])

    x_train = data[:-(nb_validation_samples+nb_test_samples)]
    y_train = labels[:-(nb_validation_samples+nb_test_samples)]
    x_val = data[-(nb_validation_samples+nb_test_samples):-nb_test_samples]
    y_val = labels[-(nb_validation_samples+nb_test_samples):-nb_test_samples]
    x_test = data[-nb_test_samples:]
    y_test = labels[-nb_test_samples:]
    
    return word_index,x_train, y_train,x_val, y_val,x_test,y_test

In [11]:
def prepare_datasets_turns_props(chats,flag=False, length =0):
    if flag:
        chats = {k: v for k, v in chats.items() if len(v["turns"])>length}
    
    texts,labels = create_texts_labels(chats,flag,length)

#     tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
#     tokenizer.fit_on_texts(texts)
    data,vecs,tokenizer = create_data_with_turns(chats,len(texts),flag,length)
    aux_data = np.zeros((len(texts), MAX_TURNS,3), dtype='float32')
    for i,vec in enumerate(vecs):
        if flag:
            for j,v in enumerate(vec[:length]):
                aux_data[i,j,:]= np.array(v)
        else:
            for j,v in enumerate(vec):
                aux_data[i,j,:]= np.array(v)
    
    
    
    norm_hlp = aux_data.max(axis=0).max(axis=0).astype(np.float)
    aux_data = aux_data.astype(np.float)/norm_hlp

    labels = to_categorical(np.asarray(labels))

    word_index = tokenizer.word_index
    print('Total %s unique tokens.' % len(word_index))

#     labels = to_categorical(np.asarray(labels))
    print('Shape of data tensor:', data.shape)
    print('Shape of label tensor:', labels.shape)
    print('Shape of aux tensor:', aux_data.shape)
#     data = data[:13]
#     labels = labels[:13]
    indices = np.arange(len(data))
    np.random.shuffle(indices)
    data = data[indices]
    aux_data = aux_data[indices]
    labels = labels[indices]
    nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
    nb_test_samples = int(TEST_SPLIT * data.shape[0])

    x_train = (data[:-(nb_validation_samples+nb_test_samples)],aux_data[:-(nb_validation_samples+nb_test_samples)])
    y_train = labels[:-(nb_validation_samples+nb_test_samples)]
    x_val = (data[-(nb_validation_samples+nb_test_samples):-nb_test_samples],aux_data[-(nb_validation_samples+nb_test_samples):-nb_test_samples])
    y_val = labels[-(nb_validation_samples+nb_test_samples):-nb_test_samples]
    x_test = (data[-nb_test_samples:],aux_data[-nb_test_samples:])
    y_test = labels[-nb_test_samples:]
    
    return word_index,x_train, y_train,x_val, y_val,x_test,y_test

In [12]:
def create_embedding_index():
    embeddings_index = {}
    f = open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt'))
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
    f.close()
    print('Total %s word vectors.' % len(embeddings_index))
    return embeddings_index

In [13]:



def create_embedding_matrix(word_index):
    embeddings_index= create_embedding_index();
    embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))

    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
    return embedding_matrix



In [14]:
def hierarcical_lstm_network(word_index):
    embedding_matrix = create_embedding_matrix(word_index)
    embedding_layer = Embedding(len(word_index) + 1,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SENT_LENGTH,
                                trainable=True)
    sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
    embedded_sequences = embedding_layer(sentence_input)

    l_lstm = Bidirectional(LSTM(RNN_DIM))(embedded_sequences)
    sentEncoder = Model(sentence_input, l_lstm)
    
    review_input = Input(shape=(MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')

    review_encoder = TimeDistributed(sentEncoder)(review_input)
    print("review_encoder %s"%str(review_encoder._keras_shape))
    l_lstm_sent = Bidirectional(LSTM(RNN_DIM))(review_encoder)

    preds = Dense(2, activation='softmax')(l_lstm_sent)
    model = Model(review_input, preds)

    model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])
    
    return model


In [15]:
def hierarcical_lstm_network_with_turns(word_index):
    embedding_matrix = create_embedding_matrix(word_index)
    embedding_layer = Embedding(len(word_index) + 1,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SENT_LENGTH,
                                trainable=True)
    sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
    embedded_sequences = embedding_layer(sentence_input)

    l_lstm = Bidirectional(LSTM(RNN_DIM))(embedded_sequences)
    sentEncoder = Model(sentence_input, l_lstm)
    print(sentEncoder.summary())
    turns_input = Input(shape=(MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')
    turns_encoder = TimeDistributed(sentEncoder)(turns_input)
    
    l_lstm_sent = Bidirectional(LSTM(RNN_DIM))(turns_encoder)
    turnsEncoder = Model(turns_input, l_lstm_sent)
    
    print(turnsEncoder.summary())
    chats_input = Input(shape=(MAX_TURNS,MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')

    chats_encoder_layer = TimeDistributed(turnsEncoder)
#     print("chats_encoder_layer %s"%str(chats_encoder_layer.output_shape))
#     print("chats_encoder_layer %s"%str(chats_encoder_layer.input_shape))
    chats_encoder = chats_encoder_layer(chats_input)
    
    l_lstm_turns = Bidirectional(LSTM(RNN_DIM))(chats_encoder)

    preds = Dense(2, activation='softmax')(l_lstm_turns)
    model = Model(chats_input, preds)
#     plot_model(model, to_file='turnsmodel.png')   
    model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])
    return model


In [16]:
def hierarcical_lstm_network_with_turns_attention(word_index):
    embedding_matrix = create_embedding_matrix(word_index)
    embedding_layer = Embedding(len(word_index) + 1,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SENT_LENGTH,
                                trainable=True)
    sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
    embedded_sequences = embedding_layer(sentence_input)

    l_lstm = Bidirectional(LSTM(RNN_DIM,return_sequences=True))(embedded_sequences)
    attention = Dense(1, activation='tanh')(l_lstm)
    attention = Flatten()(attention)
    attention = Activation('softmax')(attention)
    attention = RepeatVector(RNN_DIM*2)(attention)

    attention = Permute([2, 1])(attention)

    print("l_lstm ndim %s attention %s"%(l_lstm._keras_shape,attention._keras_shape))
    sent_representation = multiply([l_lstm, attention])
    sent_representation = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(RNN_DIM*2,))(sent_representation)

    sentEncoder = Model(sentence_input, sent_representation)
    turns_input = Input(shape=(MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')
    turns_encoder = TimeDistributed(sentEncoder)(turns_input)
    
    l_lstm_sent = Bidirectional(LSTM(RNN_DIM,return_sequences=True))(turns_encoder)

    attention_sent = Dense(1, activation='tanh')(l_lstm_sent)
    attention_sent = Flatten()(attention_sent)
    attention_sent = Activation('softmax')(attention_sent)
    attention_sent = RepeatVector(RNN_DIM*2)(attention_sent)
    attention_sent = Permute([2, 1])(attention_sent)

    print("embedded_sequences ndim %d"%K.ndim(embedded_sequences))
    turn_representation = multiply([l_lstm_sent, attention_sent])
    turn_representation = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(RNN_DIM*2,))(turn_representation)

    turnsEncoder = Model(turns_input, turn_representation)
    
    
    chats_input = Input(shape=(MAX_TURNS,MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')

    chats_encoder_layer = TimeDistributed(turnsEncoder)
#     print("chats_encoder_layer %s"%str(chats_encoder_layer.output_shape))
#     print("chats_encoder_layer %s"%str(chats_encoder_layer.input_shape))
    chats_encoder = chats_encoder_layer(chats_input)
    model = Model(chats_input, chats_encoder)
    print("chat_input modek %s"%model.summary())
    l_lstm_turns = Bidirectional(LSTM(RNN_DIM*2))(chats_encoder)

    preds = Dense(2, activation='softmax')(l_lstm_turns)
    model = Model(chats_input, preds)
#     plot_model(model, to_file='turnsmodel.png')   
    model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])
    print(model.summary())
    return model


In [28]:
def hierarcical_lstm_network_with_turns_attention_on_turns(word_index):
    embedding_matrix = create_embedding_matrix(word_index)
    embedding_layer = Embedding(len(word_index) + 1,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SENT_LENGTH,
                                trainable=True)
    sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
    embedded_sequences = embedding_layer(sentence_input)

    l_lstm = Bidirectional(LSTM(RNN_DIM,return_sequences=True))(embedded_sequences)
    attention = Dense(1, activation='tanh')(l_lstm)
    attention = Flatten()(attention)
    attention = Activation('softmax')(attention)
    attention = RepeatVector(RNN_DIM*2)(attention)

    attention = Permute([2, 1])(attention)

    print("l_lstm ndim %s attention %s"%(l_lstm._keras_shape,attention._keras_shape))
    sent_representation = multiply([l_lstm, attention])
    sent_representation = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(RNN_DIM*2,))(sent_representation)

    sentEncoder = Model(sentence_input, sent_representation)
    turns_input = Input(shape=(MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')
    turns_encoder = TimeDistributed(sentEncoder)(turns_input)
    
    l_lstm_sent = Bidirectional(LSTM(RNN_DIM,return_sequences=True))(turns_encoder)

    attention_sent = Dense(1, activation='tanh')(l_lstm_sent)
    attention_sent = Flatten()(attention_sent)
    attention_sent = Activation('softmax')(attention_sent)
    attention_sent = RepeatVector(RNN_DIM*2)(attention_sent)
    attention_sent = Permute([2, 1])(attention_sent)

    print("embedded_sequences ndim %d"%K.ndim(embedded_sequences))
    turn_representation = multiply([l_lstm_sent, attention_sent])
    turn_representation = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(RNN_DIM*2,))(turn_representation)

    turnsEncoder = Model(turns_input, turn_representation)
    
    
    chats_input = Input(shape=(MAX_TURNS,MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')

    chats_encoder = TimeDistributed(turnsEncoder)(chats_input)

    l_lstm_turns = Bidirectional(LSTM(RNN_DIM,return_sequences=True))(chats_encoder)

    attention_turn = Dense(1, activation='tanh')(l_lstm_turns)
    attention_turn = Flatten()(attention_turn)
    attention_turn = Activation('softmax')(attention_turn)
    attention_turn = RepeatVector(RNN_DIM*2)(attention_turn)
    attention_turn = Permute([2, 1])(attention_turn)

    chat_representation = multiply([l_lstm_turns, attention_turn])
    chat_representation = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(RNN_DIM*2,))(chat_representation)

    preds = Dense(2, activation='softmax')(chat_representation)
    model = Model(chats_input, preds)
#     plot_model(model, to_file='turnsmodel.png')   
    model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])
    print(model.summary())
    return model


In [18]:
def hierarcical_lstm_network_with_turns_props(word_index):
    embedding_matrix = create_embedding_matrix(word_index)
    embedding_layer = Embedding(len(word_index) + 1,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SENT_LENGTH,
                                trainable=True)
    sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
    embedded_sequences = embedding_layer(sentence_input)

    l_lstm = Bidirectional(LSTM(RNN_DIM))(embedded_sequences)
    sentEncoder = Model(sentence_input, l_lstm)

    turns_input = Input(shape=(MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')
    turns_encoder = TimeDistributed(sentEncoder)(turns_input)
    

    auxiliary_input = Input(shape=(MAX_TURNS,3,), name='aux_input')
    l_lstm_sent = Bidirectional(LSTM(RNN_DIM))(turns_encoder)
    turnsEncoder = Model(turns_input, l_lstm_sent)


    chats_input = Input(shape=(MAX_TURNS,MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')
    chats_encoder_layer = TimeDistributed(turnsEncoder)
    chats_encoder = chats_encoder_layer(chats_input)
    
    l_lstm_turns = Bidirectional(LSTM(RNN_DIM))(concatenate([chats_encoder, auxiliary_input]))

    preds = Dense(2, activation='softmax')(l_lstm_turns)
    model = Model([chats_input,auxiliary_input], preds)
          
    

    model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['acc'])
    return model

In [19]:
SINGLE_ATTENTION_VECTOR = False
APPLY_ATTENTION_BEFORE_LSTM = False

def attention_3d_block(inputs):
    # inputs.shape = (batch_size, time_steps, input_dim)
    input_dim = int(inputs.shape[2])
    a = Permute((2, 1))(inputs)
    a = Reshape((input_dim, TIME_STEPS))(a)
    a = Dense(TIME_STEPS, activation='softmax')(a)
    if SINGLE_ATTENTION_VECTOR:
        a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)
        a = RepeatVector(input_dim)(a)
    a_probs = Permute((2, 1), name='attention_vec')(a)
    output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
    return output_attention_mul


In [20]:
def hierarcical_attention_network(word_index):
    embedding_matrix = create_embedding_matrix(word_index)
    embedding_layer = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SENT_LENGTH,
                            trainable=True)


    sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
    embedded_sequences = embedding_layer(sentence_input)
    l_lstm = Bidirectional(LSTM(50, return_sequences=True))(embedded_sequences)
    
    attention = Dense(1, activation='tanh')(l_lstm)
    attention = Flatten()(attention)
    attention = Activation('softmax')(attention)
    attention = RepeatVector(100)(attention)
    attention = Permute([2, 1])(attention)

    # print("embedded_sequences ndim %d"%K.ndim(embedded_sequences))
    sent_representation = multiply([l_lstm, attention])
    sent_representation = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(100,))(sent_representation)

    sentEncoder = Model(sentence_input, sent_representation)
    
    print(sentEncoder.summary())
    
    review_input = Input(shape=(MAX_SENTS,MAX_SENT_LENGTH), dtype='int32')

    review_encoder = TimeDistributed(sentEncoder)(review_input)
#     entEncoder = Model(review_input, review_encoder)
#     print(entEncoder.summary())

    l_lstm_sent = Bidirectional(LSTM(MAX_SENTS))(review_encoder)

#     l_dense_sent = TimeDistributed(Dense(200))(l_lstm_sent)
#     attention_mul = attention_3d_block(l_lstm)
#     attention_mul = Flatten()(attention_mul)
#     l_dense = TimeDistributed(Dense(200))(l_lstm)
    


#     l_att_sent = AttLayer()(l_dense_sent)
#     model = Model(review_input, l_lstm_sent)
#     print(model.summary())
    
    preds = Dense(2, activation='softmax')(l_lstm_sent)
    model = Model(review_input, preds)
    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['acc'])
    print(model.summary())
    return model

In [21]:

def evaluate_model(model,x_train, y_train,x_val, y_val,x_test,y_test):
    print(model.metrics_names)
    model.fit(x_train, y_train, validation_data=(x_val, y_val),
    epochs=10, batch_size=20)

    return model.evaluate(x_test,y_test,batch_size=20, verbose=1, sample_weight=None)



In [22]:

def evaluate_model_multi(model,x_train, y_train,x_val, y_val,x_test,y_test):
    print(model.metrics_names)
    model.fit([x_train[0],x_train[1]], y_train, validation_data=([x_val[0],x_val[1]], y_val),
    epochs=10, batch_size=20)

    return model.evaluate([x_test[0],x_test[1]],y_test,batch_size=20, verbose=1, sample_weight=None)



In [23]:
  
def run_net(net,file,word_index,x_train, y_train,x_val, y_val,x_test,y_test):
    model = net(word_index)
    res = evaluate_model(model,x_train, y_train,x_val, y_val,x_test,y_test)
    file.write(str(net)+"\n")
    file.write(str(res)+"\n")

    
  

    
def run_networks(network_funcs, configurations,file):

    with open(file,"w") as results_file:
        chats = gen_chat_data()
        word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets(chats)
        for net in network_funcs:
            
            run_net(net,results_file,word_index,x_train, y_train,x_val, y_val,x_test,y_test)
       
        for conf in configurations:
            
            word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets(chats,True,conf)
            results_file.write("config %d\n"%conf)
            for net in network_funcs:
                run_net(net,results_file,word_index,x_train, y_train,x_val, y_val,x_test,y_test)
            
               
          



                

In [24]:
def run_net_multi(net,file,word_index,x_train, y_train,x_val, y_val,x_test,y_test):
    model = net(word_index)
    res = evaluate_model_multi(model,x_train, y_train,x_val, y_val,x_test,y_test)
    file.write(str(net)+"\n")
    file.write(str(res)+"\n")
    

In [25]:

def run_networks_turns(file,flag=False,length =0):

    with open(file,"w") as results_file:
        chats = gen_chat_data()
        if(flag):
            word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets_turns(chats,True,length)
            
        else:
            word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets_turns(chats)
        run_net_multi(hierarcical_lstm_network_with_turns,results_file,word_index,x_train, y_train,x_val, y_val,x_test,y_test)
            
            
                        
          


In [26]:
def run_networks_turns_props(file,flag=False,length =0):

    with open(file,"w") as results_file:
        chats = gen_chat_data()
        if(flag):
            word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets_turns_props(chats,True,length)
            
        else:
            word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets_turns_props(chats)
        run_net_multi(hierarcical_lstm_network_with_turns_props,results_file,word_index,x_train, y_train,x_val, y_val,x_test,y_test)
            
            
            
               
          


In [None]:
import sys, os

def func_menu():
    print("choose function:\n")
    print("1 - run_networks_turns\n")
    print("2 - run_networks_turns_props\n")
    choice = input(" >>  ")
    if not choice.isdigit():
        return func_menu()
    
    return int(choice)

def attention_menu():
    print("choose attention:\n")
    print("0 - no attention\n")
    print("1 - with attention\n")
    print("2 - with attention on turns\n")
    
    choice = input(" >>  ")
    if not choice.isdigit():
        return flag_menu()
    
    return int(choice)

def flag_menu():
    print("choose flag:\n")
    print("0 - run all turns\n")
    print("1 - run partial\n")
    choice = input(" >>  ")
    if not choice.isdigit():
        return flag_menu()
    
    return int(choice) == 1

def length_menu():
    print("enter length:\n")
    choice = input(" >>  ")
    if not choice.isdigit():
        return length_menu()
    
    return int(choice)

def out_menu():
    print("enter outputfile:\n")
    choice = input(" >>  ")
    return choice.strip()

menu_actions = {
    'function': func_menu,
    'attention': attention_menu,
    'flag': flag_menu,
    'length': length_menu,
    'out': out_menu
}




def main():
    f = func_menu()
    
    if f == 1:
        func = run_networks_turns
    else: 
        func = run_networks_turns_props
        
    att = attention_menu()

    
    flag = flag_menu()
    length = 0
    if flag:
        length = length_menu()
    
        
    outputfile = out_menu()

    with open(outputfile,"w") as results_file:
        chats = gen_chat_data()
        if(f == 1):
            word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets_turns(chats,flag,length)
            if att==1:
                run_net(hierarcical_lstm_network_with_turns_attention,results_file,word_index,x_train, y_train,x_val, y_val,x_test,y_test)
            if att==0:
                run_net(hierarcical_lstm_network_with_turns,results_file,word_index,x_train, y_train,x_val, y_val,x_test,y_test)
            if att==2:
                run_net(hierarcical_lstm_network_with_turns_attention_on_turns,results_file,word_index,x_train, y_train,x_val, y_val,x_test,y_test)
                
        else:
            word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets_turns_props(chats,flag,length)
            run_net_multi(hierarcical_lstm_network_with_turns_props,results_file,word_index,x_train, y_train,x_val, y_val,x_test,y_test)


# def main2():
#     chats = gen_chat_data()
#     word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets(chats)
#     model = hierarcical_attention_network(word_index)
#     evaluate_model(model,x_train, y_train,x_val, y_val,x_test,y_test)

            
if __name__ == "__main__":
#     results_file = open("test","w")
#     chats = gen_chat_data()
#     word_index,x_train, y_train,x_val, y_val,x_test,y_test = prepare_datasets_turns(chats)
#     run_net(hierarcical_lstm_network_with_turns_attention_on_turns,results_file,word_index,x_train, y_train,x_val, y_val,x_test,y_test)
#     results_file.close()
    main()

Total 6953 unique tokens.
Shape of data tensor: (1368, 60, 20, 40)
Shape of label tensor: (1368, 2)
Total 400000 word vectors.
l_lstm ndim (None, 40, 100) attention (None, 40, 100)
embedded_sequences ndim 3
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_6 (InputLayer)             (None, 60, 20, 40)    0                                            
____________________________________________________________________________________________________
time_distributed_4 (TimeDistribu (None, 60, 100)       816402      input_6[0][0]                    
____________________________________________________________________________________________________
bidirectional_6 (Bidirectional)  (None, 60, 100)       60400       time_distributed_4[0][0]         
______________________________________________________________________________________