# Gender Classifier

### Load Data

In [24]:
%store -r cnn_model

dataset = cnn_model['dataset']

abstracts_padded = cnn_model['abstracts_padded']
labels = cnn_model['ys']
num_classes = cnn_model['num_classes']

embeddings = cnn_model['embeddings']
word_dim = cnn_model['word_dim']
word2idx, idx2word = cnn_model['word2idx'], cnn_model['idx2word']
maxlen = cnn_model['maxlen']
vocab_size = cnn_model['vocab_size']
num_train = cnn_model['num_train']

### Hyperparameters

In [30]:
# Hyperparameters
batch_size = 32
nb_filter = 5
filter_length = 2
hidden_dims = 32
nb_epoch = 35

### Define Model

In [26]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.embeddings import Embedding
from keras.layers.convolutional import Convolution1D, MaxPooling1D

print('Build model...')
model = Sequential()

model.add(Embedding(input_dim=vocab_size, output_dim=word_dim, weights=[embeddings], input_length=maxlen))
model.add(Dropout(0.25))

model.add(Convolution1D(nb_filter=nb_filter,
                        filter_length=filter_length,
                        activation='relu'))
model.add(MaxPooling1D(pool_length=2))

model.add(Flatten())
model.add(Dense(hidden_dims))
model.add(Dropout(0.25))
model.add(Activation('relu'))

model.add(Dense(num_classes))
model.add(Activation('sigmoid'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

Build model...


In [31]:
ys = np.zeros([num_train, num_classes])
ys[np.arange(num_train), labels] = 1

model.fit(abstracts_padded, ys, nb_epoch=nb_epoch, show_accuracy=True, validation_data=(abstracts_padded, ys))

Train on 30 samples, validate on 30 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


<keras.callbacks.History at 0x7f412f9ddbd0>

### Examine Bigrams Which Filters Fire on

In [32]:
filters = model.layers[2].W.eval()
filters = np.squeeze(filters)
filters = [filter.T for filter in filters]

abstract = abstracts_padded[0]

def activation_generator(filter):
    for w1, w2 in zip(abstract, abstract[1:]):
        yield np.sum(embeddings[[w1, w2]] * filter), (w1, w2)
        
def activations_generator(filters):
    for filter in filters:
        yield list(activation_generator(filter))
        
activations = list(activations_generator(filters))

for activation in activations:
    for score, (w1, w2) in sorted(activation, reverse=True)[:10]:
        print score, idx2word[w1], idx2word[w2]
        
    print

0.41949547929 weeks ,
0.409910006469 weeks duration
0.378183749946 months in
0.365053895001 6 months
0.361200462159 trial ;
0.329211430784 for 6
0.328533914876 trial ,
0.319058119962 trial .
0.305586523088 at 8
0.305065498853 time .

0.38852793387 reflecting the
0.359276913601 of the
0.359233449515 . The
0.320809936559 symptoms .
0.318168784284 . Because
0.312273309689 of negative
0.312273309689 of negative
0.31188495453 with response
0.302034817561 in this
0.301182087749 response of

0.46354862094 of 50
0.438458310073 of potential
0.433554319179 the selected
0.421041743161 the magnitude
0.398178848994 of 8
0.398099703742 of therapeutic
0.398099703742 of therapeutic
0.386244117003 of schizophrenia
0.373783778498 of conventional
0.371321940341 of negative

0.279932761905 ; drop-out
0.240466662996 in a
0.240466662996 in a
0.22682281232 from placebo
0.220197336066 in this
0.21780245975 in trials
0.191382448753 exhibit therapeutic
0.183748895764 , a
0.183748895764 , a
0.183748895764 , a

0