# Gender Classifier

### Load Data

In [108]:
%store -r cnn_model

dataset = cnn_model['dataset']

abstracts_padded = cnn_model['abstracts_padded']
labels = cnn_model['ys']
num_classes = cnn_model['num_classes']

embeddings = cnn_model['embeddings']
word_dim = cnn_model['word_dim']
word2idx, idx2word = cnn_model['word2idx'], cnn_model['idx2word']
maxlen = cnn_model['maxlen']
vocab_size = cnn_model['vocab_size']
num_train = cnn_model['num_train']

### Hyperparameters

In [64]:
# Hyperparameters
batch_size = 32
nb_filter = 5
filter_length = 2
hidden_dims = 32
nb_epoch = 100

### Define Model

In [65]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.embeddings import Embedding
from keras.layers.convolutional import Convolution1D, MaxPooling1D

print('Build model...')
model = Sequential()

model.add(Embedding(input_dim=vocab_size, output_dim=word_dim, weights=[embeddings], input_length=maxlen))
model.add(Dropout(0.25))

model.add(Convolution1D(nb_filter=nb_filter,
                        filter_length=filter_length,
                        activation='relu'))
model.add(MaxPooling1D(pool_length=2))

model.add(Flatten())
model.add(Dense(hidden_dims))
model.add(Dropout(0.25))
model.add(Activation('relu'))

model.add(Dense(num_classes))
model.add(Activation('sigmoid'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

Build model...


In [119]:
ys = np.zeros([num_train, num_classes])
ys[np.arange(num_train), labels] = 1

model.fit(abstracts_padded, ys, nb_epoch=nb_epoch, show_accuracy=True, validation_data=(abstracts_padded, ys))

Train on 3 samples, validate on 3 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch

<keras.callbacks.History at 0x7f6f232c5910>

### Examine Bigrams Which Filters Fire on

In [126]:
filters = model.layers[2].W.eval()
filters = np.squeeze(filters)
filters = [filter.T for filter in filters]

abstract = abstracts_padded[0]

def activation_generator(filter):
    for w1, w2 in zip(abstract, abstract[1:]):
        yield np.sum(embeddings[[w1, w2]] * filter), (w1, w2)
        
def activations_generator(filters):
    for filter in filters:
        yield list(activation_generator(filter))
        
activations = list(activations_generator(filters))

for activation in activations:
    for score, (w1, w2) in sorted(activation, reverse=True)[:10]:
        print score, idx2word[w1], idx2word[w2]
        
    print

0.506117483901 of 8
0.493075268554 of 50
0.42429924674 for 6
0.405558152017 treatment groups
0.392227002374 from placebo
0.385270960555 6 months
0.38491422938 of efficacy
0.361659511958 treatment with
0.35731639723 24 weeks
0.346629607111 the 6-month

0.562033781484 , the
0.479014778299 is a
0.455482436091 reflecting the
0.444166919075 a cognitive
0.431555934183 , including
0.430971522425 schizophrenia .
0.416424531976 D-serine .
0.414134852524 , a
0.414134852524 , a
0.414134852524 , a

0.572803035222 for the
0.572803035222 for the
0.564125273132 . Because
0.563472526988 of the
0.563190037601 and cognitive
0.563190037601 and cognitive
0.554839769879 a cognitive
0.535594728225 site ,
0.505572663576 rate ,
0.502821985798 . The

0.620179076402 8 weeks
0.590728107142 6 months
0.586289117237 8 or
0.572058941313 50 mg/day
0.572058941313 50 mg/day
0.551833770301 or placebo
0.521503149366 , were
0.517406968923 , or
0.507157257505 , treated
0.48025551218 24 weeks

0.552906629601 patients with
0