In [1]:
from person2vec.train_embeddings import train

Using TensorFlow backend.


In [2]:
from person2vec.test_embeddings import test_tasks

In [3]:
from person2vec.generators import training_data_generator

In [4]:
data_gen = training_data_generator.EmbeddingDataGenerator(300,4)

In [None]:
model = train.train_model(epochs=3, data_gen=data_gen)

In [None]:
test_tasks.test_model(model)

In [None]:
model = train.train_model(model=model, epochs=30)

In [None]:
test_tasks.test_model(model)

In [None]:
model.save_weights('../person2vec/data/weights/dense1000_60epoch_97genderacc.h5')

In [None]:
model2 = train._build_default_model()

In [None]:
model2.load_weights('../person2vec/data/weights/dense1000_60epoch_97genderacc.h5')

In [None]:
model = train.train_model(model=model2, epochs=1)

In [None]:
test_tasks.test_word2vec(data_gen.word_vectors, data_gen=data_gen)

In [None]:
import pickle

In [None]:
embeds = pickle.load(open("../person2vec/data/weights/embed_weights_1.pk", "rb" ))

In [None]:
test_tasks.test_embeddings(embeds, data_gen=data_gen)

In [None]:
import pandas

In [None]:
frame = pandas.DataFrame([['frank', 'john', 'jimmy','sara'],['banker', 'lawyer', 'doctor','lawyer']])
frame = frame.T
frame.columns=['name', 'occupation']
frame.set_index('name', inplace=True)
frame

In [None]:
frame = pandas.get_dummies(frame.occupation)
frame

In [None]:
frame.values

In [None]:
from keras.layers import Input, Flatten, Dropout, Embedding, Dense
from keras.layers.merge import Concatenate
from keras.models import Model
from keras import optimizers

In [None]:
num_total_entities = 1693
word_vec_size = 300
snip_size = 32
num_compare_entities = 4
embedding_size = 300

input_tensor_words = Input(shape=(snip_size, word_vec_size,), dtype='float32', name='word_input')
input_tensor_entity = Input(shape=(num_compare_entities,), dtype='int32', name='entity_input')

word_flatten_layer = Flatten()(input_tensor_words)
word_dropout_layer = Dropout(0.)(word_flatten_layer)

entity_embedding_layer = Embedding(num_total_entities, embedding_size, input_length=num_compare_entities, name='entity_embedding')(input_tensor_entity)
entity_embedding_layer = Flatten()(entity_embedding_layer)
entity_embedding_layer = Dropout(0.)(entity_embedding_layer)

word_branch = Dense(2000, activation="relu", name='dense_sentence_layer')(word_dropout_layer)

joint_embeds = Concatenate(name='joint_embeds')([word_branch, entity_embedding_layer])

nex = Dropout(0.)(joint_embeds)
nex = Dense(200, activation="relu", name='dense_consolidator')(nex)
nex = Dropout(0.)(nex)
full_out = Dense (4, activation='softmax', name='final_output')(nex)

model = Model([input_tensor_words, input_tensor_entity], full_out)

model.summary()

In [None]:
opt = optimizers.adam()
loss = 'categorical_crossentropy'
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [None]:
from person2vec.generators import training_data_generator
data_gen = training_data_generator.EmbeddingDataGenerator(300,4)

In [None]:
model, data_gen = train.train_model(model=model, epochs=200, data_gen=data_gen)

In [None]:
model.save_weights('../person2vec/data/weights/dense2000_200epoch.h5')

In [None]:
test_tasks.test_model(model, data_gen=data_gen)

In [None]:
from keras.layers import BatchNormalization

In [None]:
num_total_entities = 1693
word_vec_size = 300
snip_size = 32
num_compare_entities = 4
embedding_size = 300

input_tensor_words = Input(shape=(snip_size, word_vec_size,), dtype='float32', name='word_input')
input_tensor_entity = Input(shape=(num_compare_entities,), dtype='int32', name='entity_input')

word_flatten_layer = Flatten()(input_tensor_words)

entity_embedding_layer = Embedding(num_total_entities, embedding_size, input_length=num_compare_entities, name='entity_embedding')(input_tensor_entity)
entity_embedding_layer = Flatten()(entity_embedding_layer)

word_branch = Dense(1000, activation="relu", name='dense_sentence_layer')(word_flatten_layer)
word_branch = BatchNormalization()(word_branch)

joint_embeds = Concatenate(name='joint_embeds')([word_branch, entity_embedding_layer])

nex = Dense(200, activation="relu", name='dense_consolidator')(joint_embeds)
full_out = Dense (4, activation='softmax', name='final_output')(nex)

model = Model([input_tensor_words, input_tensor_entity], full_out)

model.summary()

In [None]:
opt = optimizers.adam()
loss = 'categorical_crossentropy'
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [None]:
model, data_gen = train.train_model(model=model, epochs=50, data_gen=data_gen)

In [None]:
test_tasks.test_model(model, data_gen=data_gen)

In [None]:
from keras.layers import Convolution1D, MaxPooling1D

In [None]:
num_total_entities = 1693
word_vec_size = 300
snip_size = 32
num_compare_entities = 4
embedding_size = 300

input_tensor_words = Input(shape=(snip_size, word_vec_size,), dtype='float32', name='word_input')
input_tensor_entity = Input(shape=(num_compare_entities,), dtype='int32', name='entity_input')

# word_flatten_layer = Flatten()(input_tensor_words)

entity_embedding_layer = Embedding(num_total_entities, embedding_size, input_length=num_compare_entities, name='entity_embedding')(input_tensor_entity)
entity_embedding_layer = Flatten()(entity_embedding_layer)

word_branch = Dense(1000, activation="relu", name='dense_sentence_layer')(input_tensor_words)
word_branch = Flatten()(word_branch)
word_branch = BatchNormalization()(word_branch)

convs = [] 
for num in range (2, 5): 
    x = Convolution1D(32, num, padding='same', activation="relu")(input_tensor_words)
    x = MaxPooling1D()(x) 
    x = Flatten()(x) 
    convs.append(x)

conv_out = Concatenate(name='concatenated_convs')(convs)


joint_embeds = Concatenate(name='joint_embeds')([word_branch, conv_out, entity_embedding_layer])
joint_embeds = BatchNormalization()(joint_embeds)

nex = Dense(500, activation="relu", name='dense_consolidator')(joint_embeds)
full_out = Dense (4, activation='softmax', name='final_output')(nex)

model = Model([input_tensor_words, input_tensor_entity], full_out)

model.summary()

In [None]:
opt = optimizers.adam()
loss = 'categorical_crossentropy'
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [None]:
model, data_gen = train.train_model(model=model, epochs=50, data_gen=data_gen)

In [None]:
test_tasks.test_model(model, data_gen=data_gen)

In [None]:
num_total_entities = 1693
word_vec_size = 300
snip_size = 32
num_compare_entities = 4
embedding_size = 300

input_tensor_words = Input(shape=(snip_size, word_vec_size,), dtype='float32', name='word_input')
input_tensor_entity = Input(shape=(num_compare_entities,), dtype='int32', name='entity_input')

word_flatten_layer = Flatten()(input_tensor_words)
word_dropout_layer = Dropout(0.)(word_flatten_layer)

entity_embedding_layer = Embedding(num_total_entities, embedding_size, input_length=num_compare_entities, name='entity_embedding')(input_tensor_entity)
entity_embedding_layer = Flatten()(entity_embedding_layer)
entity_embedding_layer = Dropout(0.)(entity_embedding_layer)

word_branch = Dense(1000, activation="relu", name='dense_sentence_layer')(word_dropout_layer)
word_branch = Dense(500, activation="relu", name='dense_sentence_layer_2')(word_branch)


joint_embeds = Concatenate(name='joint_embeds')([word_branch, entity_embedding_layer])

nex = Dropout(0.)(joint_embeds)
nex = Dense(100, activation="relu", name='dense_consolidator')(nex)
nex = Dropout(0.)(nex)
full_out = Dense (4, activation='softmax', name='final_output')(nex)

model = Model([input_tensor_words, input_tensor_entity], full_out)

model.summary()

In [None]:
opt = optimizers.adam()
loss = 'categorical_crossentropy'
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [None]:
model, data_gen = train.train_model(model=model, epochs=120, data_gen=data_gen)

In [None]:
test_tasks.test_model(model, data_gen=data_gen)

In [None]:
num_total_entities = 1693
word_vec_size = 300
snip_size = 32
num_compare_entities = 4
embedding_size = 300

input_tensor_words = Input(shape=(snip_size, word_vec_size,), dtype='float32', name='word_input')
input_tensor_entity = Input(shape=(num_compare_entities,), dtype='int32', name='entity_input')

word_flatten_layer = Flatten()(input_tensor_words)
word_dropout_layer = Dropout(0.)(word_flatten_layer)

entity_embedding_layer = Embedding(num_total_entities, embedding_size, input_length=num_compare_entities, name='entity_embedding')(input_tensor_entity)
entity_embedding_layer = Flatten()(entity_embedding_layer)
entity_embedding_layer = Dropout(0.)(entity_embedding_layer)

word_branch = Dense(1000, activation="relu", name='dense_sentence_layer')(word_dropout_layer)

joint_embeds = Concatenate(name='joint_embeds')([word_branch, entity_embedding_layer])

nex = Dropout(0.)(joint_embeds)
nex = Dense(1000, activation="relu", name='dense_consolidator')(nex)
nex = Dropout(0.)(nex)
full_out = Dense (4, activation='softmax', name='final_output')(nex)

model = Model([input_tensor_words, input_tensor_entity], full_out)

model.summary()

In [None]:
opt = optimizers.adam()
loss = 'categorical_crossentropy'
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [None]:
model, data_gen = train.train_model(model=model, epochs=120, data_gen=data_gen)

In [None]:
test_tasks.test_model(model, data_gen=data_gen)

In [None]:
model, data_gen = train.train_model(model=model, epochs=80, data_gen=data_gen)

In [None]:
test_tasks.test_model(model, data_gen=data_gen)

In [None]:
num_total_entities = 1693
word_vec_size = 300
snip_size = 32
num_compare_entities = 4
embedding_size = 300

input_tensor_words = Input(shape=(snip_size, word_vec_size,), dtype='float32', name='word_input')
input_tensor_entity = Input(shape=(num_compare_entities,), dtype='int32', name='entity_input')

word_flatten_layer = Flatten()(input_tensor_words)
word_dropout_layer = Dropout(0.)(word_flatten_layer)

entity_embedding_layer = Embedding(num_total_entities, embedding_size, input_length=num_compare_entities, name='entity_embedding')(input_tensor_entity)
entity_embedding_layer = Flatten()(entity_embedding_layer)
entity_embedding_layer = Dropout(0.)(entity_embedding_layer)

word_branch = Dense(1000, activation="relu", name='dense_sentence_layer')(word_dropout_layer)

joint_embeds = Concatenate(name='joint_embeds')([word_branch, entity_embedding_layer])

nex = Dropout(0.)(joint_embeds)
nex = Dense(2000, activation="relu", name='dense_consolidator')(nex)
nex = Dropout(0.)(nex)
full_out = Dense (4, activation='softmax', name='final_output')(nex)

model = Model([input_tensor_words, input_tensor_entity], full_out)

model.summary()

In [None]:
opt = optimizers.adam()
loss = 'categorical_crossentropy'
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [None]:
model, data_gen = train.train_model(model=model, epochs=120, data_gen=data_gen)

In [None]:
test_tasks.test_model(model, data_gen=data_gen)

In [None]:
data_gen = training_data_generator.EmbeddingDataGenerator(300,10)

In [None]:
num_total_entities = 1693
word_vec_size = 300
snip_size = 32
num_compare_entities = 10
embedding_size = 300

input_tensor_words = Input(shape=(snip_size, word_vec_size,), dtype='float32', name='word_input')
input_tensor_entity = Input(shape=(num_compare_entities,), dtype='int32', name='entity_input')

word_flatten_layer = Flatten()(input_tensor_words)
word_dropout_layer = Dropout(0.)(word_flatten_layer)

entity_embedding_layer = Embedding(num_total_entities, embedding_size, input_length=num_compare_entities, name='entity_embedding')(input_tensor_entity)
entity_embedding_layer = Flatten()(entity_embedding_layer)
entity_embedding_layer = Dropout(0.)(entity_embedding_layer)

word_branch = Dense(1000, activation="relu", name='dense_sentence_layer')(word_dropout_layer)

joint_embeds = Concatenate(name='joint_embeds')([word_branch, entity_embedding_layer])

nex = Dropout(0.)(joint_embeds)
nex = Dense(1000, activation="relu", name='dense_consolidator')(nex)
nex = Dropout(0.)(nex)
full_out = Dense (4, activation='softmax', name='final_output')(nex)

model = Model([input_tensor_words, input_tensor_entity], full_out)

model.summary()

In [None]:
opt = optimizers.adam()
loss = 'categorical_crossentropy'
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [None]:
model, data_gen = train.train_model(model=model, epochs=120, data_gen=data_gen)