In [1]:
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import pandas as pd
import numpy as np
from random import randint,shuffle

In [2]:
d=open("dictionary.txt", 'r')
s=open("sentiment_labels.txt")
sents=[]
labels=[]
for line in d:
    sents.append(line.split('|')[0])
for line in s:
    score=line.split('|')[1]
    labels.append(score.split('\n')[0])
d.close()
s.close()

In [3]:
def hot_vector(size, sent_feat_temps):
    sent_feats = np.zeros(size)
    for element in sent_feat_temps:
        sent_feats[element] = 1    
    return sent_feats

In [4]:
def create_vocab(fts_list):#takes a list of lists of words
    rV=[]
    for el in fts_list:
        for word in el:
            rV.extend(el)
    rV=set(rV)
    return rV

In [5]:
token_sents=[]
lemmatizer = WordNetLemmatizer()
for sent in sents:
    sentence=word_tokenize(sent)
    lem_sentence=[]
    for word in sentence:
        lem_sentence.append(lemmatizer.lemmatize(word.lower()))
    token_sents.append(lem_sentence)
#    print(sentence)
#    input()
zipped=list(zip(token_sents,labels))
del zipped[0]#getting rid of the header
zipped.sort(key = lambda x: len(x[0]), reverse=True)
language_data=zipped[:100000]
#list of tuples. One per sentence. ([list of lemmatized lowercase words in the sentence], a sentiment score)

In [6]:
sentlist=[sent for sent,score in language_data]
vocab=create_vocab(sentlist)
vocabsize=len(vocab)
inttofeat = dict(zip(range(vocabsize), vocab))
feattoint = dict(zip(vocab, range(vocabsize)))

In [7]:
hot_data=[]
for sent,score in language_data:
    feat=[feattoint[word] for word in sent]
    hot_data.append((hot_vector(vocabsize,feat),score))

In [8]:
#Now, sort the sentences in 3 categories:

angry_sents=[]
happy_sents=[]
neutral_sents=[]

for sent,score in hot_data:
    if 0.0 <= float(score) <= 0.25:
        angry_sents.append((sent,score))
    elif float(score) == 0.50:
        neutral_sents.append((sent,score))
    elif 0.75 <= float(score) <= 1:
        happy_sents.append((sent,score))
        

In [9]:
df=pd.read_csv("fer2013/fer2013.csv")#image data

In [10]:
#0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral
angry_df=df[df.emotion == 0]
happy_df=df[df.emotion == 3]
neutral_df=df[df.emotion == 6]
#len(angry_sents)#  7926
#len(neutral_sents)# around 19000
#len(happy_sents) # 12094

In [11]:
all_data=[]
for sent,score in angry_sents:
    image_feat=angry_df.iloc[randint(0, len(angry_df)-1)]['pixels'].split()
    image_feat = [int(i) for i in image_feat]
    all_data.append((np.array(image_feat),sent,'NEG'))
    
for sent,score in happy_sents:
    image_feat=happy_df.iloc[randint(0, len(happy_df)-1)]['pixels'].split()
    image_feat = [int(i) for i in image_feat]
    all_data.append((np.array(image_feat),sent,'POS'))

for sent,score in neutral_sents:
    image_feat=neutral_df.iloc[randint(0, len(neutral_df)-1)]['pixels'].split()
    image_feat = [int(i) for i in image_feat]
    all_data.append((np.array(image_feat),sent,'NEU'))

In [12]:
len(all_data)

39200

In [13]:
shuffle(all_data)

In [14]:
s=int((len(all_data)*0.8))#splits with 80% training, 20% test
train, test=all_data[:s], all_data[s:]

#small_data=all_data[:5000]
#s=int((len(small_data)*0.8))#splits with 80% training, 20% test
#strain, stest=small_data[:s], small_data[s:]

In [21]:
xtrain=[(image,sent) for (image,sent,label) in train]
ytrain=[label for (image,sent,label) in train]
xtest=[(image,sent) for (image,sent,label) in test]
ytest=[label for (image,sent,label) in test]

#xtrain=[(image,sent) for (image,sent,label) in strain]
#ytrain=[label for (image,sent,label) in strain]
#xtest=[(image,sent) for (image,sent,label) in stest]
#ytest=[label for (image,sent,label) in stest]

In [22]:
image_size=len(all_data[0][0])
sent_size=len(all_data[0][1])
labels=set(list(l for (i,s,l) in all_data))

#image_size=len(small_data[0][0])
#sent_size=len(small_data[0][1])
#labels=set(list(l for (i,s,l) in small_data))

print('image_size: ',image_size)
print('sent_size: ',sent_size)
print('labels:  ', labels)

image_size:  2304
sent_size:  18049
labels:   {'POS', 'NEU', 'NEG'}


xtrain is now a list of tuples, but xtrain in Keras must be a list of two numpy arrays:

In [23]:
xtrain2 = list(zip(*xtrain))
xtrain2 = [np.array(xtrain2[1]), np.array(xtrain2[0])]

xtest2 = list(zip(*xtest))
xtest2 = [np.array(xtest2[1]), np.array(xtest2[0])]

In [24]:
#since we are using ‘sparse_categorical_crossentropy’ ,
#it means the output prediction must be categorical and each category is represented with an integer
cat_codes = {
    'POS': 0,
    'NEU': 1,
    'NEG': 2,
}
ytrain2 = np.array([cat_codes[c] for c in ytrain])
ytest2 = np.array([cat_codes[c] for c in ytest])

In [25]:
print('image feat type: ', type(all_data[0][0]))
print('sent feat type:', type(all_data[0][1]))

image feat type:  <class 'numpy.ndarray'>
sent feat type: <class 'numpy.ndarray'>


In [26]:
from keras.models import load_model
from keras.models import Sequential, Model
from keras.layers import Dense, LSTM, Embedding, Concatenate, Dropout
from keras.layers import Input
from keras.callbacks import EarlyStopping
import keras.backend as K

Adaption of Mehdi’s code for Visual QA:

input question is the sentence --- input image is image --- output answer is the sentiment label

In [27]:
#input_question = Input([question_len+1,])
#input_context = Input([visual_vec_len,])

input_question = Input([sent_size,])
input_context = Input([image_size,])

# learn embedings (size=50 as we chose just now :D)
q_embs = Embedding(len(vocab), 50)(input_question)

# encode the question
q_encoded = LSTM(50)(q_embs)

mlp_1 = Dense(image_size, activation='tanh')(q_encoded)

q_composed = Concatenate()([input_context, mlp_1])

mlp_2 = Dropout(0.2)(Dense(image_size, activation='relu')(q_composed))

final_a = Dense(len(labels), activation='softmax')(mlp_2)

model = Model([input_question, input_context], final_a)
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 18049)         0                                            
____________________________________________________________________________________________________
embedding_1 (Embedding)          (None, 18049, 50)     902450      input_1[0][0]                    
____________________________________________________________________________________________________
lstm_1 (LSTM)                    (None, 50)            20200       embedding_1[0][0]                
____________________________________________________________________________________________________
input_2 (InputLayer)             (None, 2304)          0                                            
___________________________________________________________________________________________

In [28]:
model.compile('adam', 'sparse_categorical_crossentropy')

In [None]:
model.fit(xtrain2, ytrain2, epochs=100, batch_size=32, validation_split=0.1, callbacks=[EarlyStopping(patience=5)])

In [None]:
model.save('saved_model.h5')  # creates a HDF5 file 'my_model.h5'

In [None]:
predictions = model.predict([xtest[0][:1], xtest[1][:1]])
#print('answer predictions', predictions)

Facial expressions dataset: 
"Challenges in Representation Learning: A report on three machine learning
contests." I Goodfellow, D Erhan, PL Carrier, A Courville, M Mirza, B
Hamner, W Cukierski, Y Tang, DH Lee, Y Zhou, C Ramaiah, F Feng, R Li,
X Wang, D Athanasakis, J Shawe-Taylor, M Milakov, J Park, R Ionescu,
M Popescu, C Grozea, J Bergstra, J Xie, L Romaszko, B Xu, Z Chuang, and
Y. Bengio. arXiv 2013.

(0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral)