In [None]:
import collections
import os
import tensorflow as tf
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Embedding, Dropout, TimeDistributed
from keras.layers import LSTM, CuDNNLSTM
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
import numpy as np
import pandas as pd
from keras.layers import Lambda
from keras.models import Model
from keras.layers import Input
import keras

# Version with timedistributed

In [None]:

def generate(model, start= 1, length_generate = 7) :
    print(reversed_dictionary[start])
    start_word = start
    built_phrase = [start_word]

    seed_text = np.array([start_word])
    seed_text = pad_sequences([seed_text],maxlen=7, padding='post')
    predictions = model.predict_classes(seed_text, verbose=0)


    for i in range(length_generate): 
      predict = predictions[0][i]
      built_phrase.append(predict)
      seed_text = pad_sequences([built_phrase], maxlen=7, padding="post")
      predictions = model.predict_classes(seed_text, verbose =0)

    return built_phrase
def read_words(filename):
    with tf.gfile.GFile(filename, "rb") as f:
        return f.read().decode("utf-8").replace("\n", "eos").split()
      
def convert_to_integer(array): 
  return [int(b) for b in array]

def get_emotion_timesteps(sequence,emotion_dict,emotion_size=5):

  toReturn = [np.zeros(emotion_size, dtype=bool)]
  
  for i in range(len(sequence)):
    
    word = reversed_dictionary[sequence[i]]
    emotion_vector = emotion_dict[word]
    added_vector = toReturn[i]|emotion_vector
    toReturn.append(added_vector)
  #This code transforms and array of booleans into 0 and on and 1  
  toReturns = [convert_to_integer(emotions) for emotions in toReturn[1:]]

  return np.array(toReturns)


def build_vocab(filename):
    data = read_words(filename)

    counter = collections.Counter(data)
    count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))

    words, _ = list(zip(*count_pairs))
    word_to_id = dict(zip(words, range(1,len(words)+1)))

    return word_to_id
  
  
def file_to_word_ids(filename, word_to_id):
    data = read_words(filename)
    return [word_to_id[word] for word in data if word in word_to_id]
  
  
def load_data(file):
    # get the data paths
    train_path = os.path.join(data_path,file)
    # build the complete vocabulary, then convert text data to list of integers
    word_to_id = build_vocab(train_path)
    train_data = file_to_word_ids(train_path, word_to_id)
    
    vocabulary = len(word_to_id)
    reversed_dictionary = dict(zip(word_to_id.values(), word_to_id.keys()))

    print(train_data[:5])
    print(word_to_id)
    print(vocabulary)
    print(" ".join([reversed_dictionary[x] for x in train_data[:10]]))
    return train_data, vocabulary, reversed_dictionary
  
  
class KerasBatchGenerator(object):

    def __init__(self, data, num_steps, batch_size, vocabulary, skip_step):
        self.data = data
        self.num_steps = num_steps
        self.batch_size = batch_size
        self.vocabulary = vocabulary+1
        # this will track the progress of the batches sequentially through the
        # data set - once the data reaches the end of the data set it will reset
        # back to zero
        self.current_idx = 0
        # skip_step is the number of words which will be skipped before the next
        # batch is skimmed from the data set
        self.skip_step = skip_step
        self.emotion_size = 5

    def generate(self):
        X_emotion= np.zeros((self.batch_size, self.num_steps, self.emotion_size ))
        x = np.zeros((self.batch_size, self.num_steps))
        y = np.zeros((self.batch_size, self.num_steps, self.vocabulary))
        while True:
            for i in range(self.batch_size):
                if self.current_idx + self.num_steps >= len(self.data):
                    # reset the index back to the start of the data set
                    self.current_idx = 0
                data_tmp = self.data[self.current_idx:self.current_idx + self.num_steps]
                x[i, :] = data_tmp
                temp_y = self.data[self.current_idx + 1:self.current_idx + self.num_steps + 1]
                # convert all of temp_y into a one hot representation
                y[i, :, :] = to_categorical(temp_y, num_classes=self.vocabulary)
                
                # get emotion vector
                X_emotion[i] = get_emotion_timesteps(data_tmp,emotion_dict,self.emotion_size)
                
                self.current_idx += self.skip_step
            yield [x,X_emotion], y


In [None]:
df = pd.read_csv('LIWC2015 Results (LICW.csv).csv')
df_emotions = df[['sad','anger','anx','negemo', 'posemo']]
emotions = df_emotions.apply(lambda d : (d!=0.0),axis=1)
df = pd.concat([df[['B']],emotions], axis = 1)


emotion_dict = {}
for w in df.values: 
  emotion_dict[w[0]] = w[1:]

In [None]:
data_path = './'
num_steps = 4
batch_size = 1
skip_step = num_steps + 1
hidden_size = 50
num_epochs = 100
input_shape = num_steps


train_data, vocabulary, reversed_dictionary = load_data('data.txt')
train_data_generator = KerasBatchGenerator(train_data, num_steps, batch_size, vocabulary,
                                           skip_step)



In [None]:
"""from keras.utils import plot_model
import graphviz
import pydot

beta = 4
#create the emotional part model
emotion_input = Input(shape=(5,), name='et-1')
g = Dense(100,activation ='sigmoid')(emotion_input)
V = Dense(vocabulary+1)(g)
V_x_beta = Lambda(lambda x: x * beta)(V)
model = Model(inputs=emotion_input, outputs=V_x_beta)

optional_input = Input(shape=(num_steps, 5))
et = TimeDistributed(model)(optional_input)



main_input = Input(shape=(input_shape,), dtype='int32', name='ct-1')
embedding = Embedding(input_dim=vocabulary+1, output_dim=50, input_length= input_shape)(main_input)
lstm_layer_1 = LSTM(200,return_sequences=True)(embedding)
lstm_layer_2 = LSTM(200,return_sequences=True)(lstm_layer_1)
ct = TimeDistributed(Dense(vocabulary+1))(lstm_layer_2)

ct_1_plus_et_1 = keras.layers.Add()([ct, et])
softmax = keras.layers.Activation('softmax')(ct_1_plus_et_1)

model = keras.models.Model(inputs=[main_input, optional_input], outputs=[softmax])

model.compile(optimizer='adam',loss='categorical_crossentropy',
              metrics=['accuracy'])


plot_model(model, to_file='version1.png')
model.summary()





model.fit_generator(train_data_generator.generate(),len(train_data)//(batch_size* num_steps),epochs=num_epochs)

"""

In [None]:
#text = generate(model,20)

# = sad, anger, anx, negemo, posemo

def generate(model, start= 1, length_generate = 7) :
    print(reversed_dictionary[start])
    start_word = start
    built_phrase = [start_word]

    seed_text = np.array([start_word])
    seed_text = keras.preprocessing.sequence.pad_sequences([seed_text],maxlen=7, padding='post')
    predictions = model.predict(seed_text, verbose=0)


    for i in range(length_generate): 
      predict = predictions[0][i]
      built_phrase.append(predict)
      seed_text = pad_sequences([built_phrase], maxlen=7, padding="post")
      predictions = model.predict_classes(seed_text, verbose =0)



word2id  = dict([(v,k) for k,v in reversed_dictionary.items()])



def emotion_per_step(i,num_step,emotions):
    
    emotion = {}
    emotion['sad'] = np.array([True,False,False,False,False])
    emotion['anger'] = np.array([False,True,False,False,False])
    emotion['anx'] = np.array([False,False,True,False,False])
    emotion['negemo'] = np.array([False,False,False,True,False])
    emotion['posemo'] = np.array([False,False,False,False,True])
    
    assert(i < num_step)
    
    emotion_to_return = np.zeros(5,dtype=bool)
    
    for emo in emotions:
        emotion_to_return = emotion_to_return|emotion[emo] 
    
    emotion_to_return = [int(e) for e in  emotion_to_return]
    
    emotion_vector = np.zeros((1,num_step,5))
    emotion_vector[0][i] = emotion_to_return
    
    return emotion_vector
    
    
#in order to generate a sequence the sequence will accept two inputs in 

start = "jack"
begin = start.split(" ")
start_words = [word2id[w] for w in begin] 
built_phrase = start_words


steps = 4

seed_text = keras.preprocessing.sequence.pad_sequences([built_phrase],maxlen=4, padding='post')

for i in range(steps):

    emo = emotion_per_step(i,steps,['negemo'])
    
    e
    y = model.predict([seed_text, emo], batch_size=1)[0][i]
    
    prediction = np.argmax(y)
    
    built_phrase




#print(2)



In [None]:
reversed_dictionary

In [None]:
model.predict()

In [None]:
emotions_ = np.zeros(5,dtype=bool)

[int(e) for e in  emotions_]