/
trainw2v.py
40 lines (31 loc) · 1.41 KB
/
trainw2v.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""Train neural network on W2V vectors.
Usage:
trainw2v.py --w2v-vectors=<vecfile> --in-classes=<classes> --prog-classifier=<classifier> --cancer-classifier=<classifier>
Options:
--w2v-vectors=<vecfile> Vector representation of reports
--in-classes=<classes> Input file with classification of radiology reports. Should have 3 columns: Id,Cancer,Progression
--prog-classifier=<classifier> Produced torch classifier for progression
--cancer-classifier=<classifier> Produced torch classifier for cancer
"""
from docopt import docopt
from buildw2v import VECTOR_SIZE
from torchnn import DataLoader, TorchNN
if __name__ == '__main__':
arguments = docopt(__doc__)
w2vVectors = arguments['--w2v-vectors']
classesfile = arguments['--in-classes']
progClassifierFileName = arguments['--prog-classifier']
cancerClassifierFileName = arguments['--cancer-classifier']
print('Preparing data...')
print(classesfile)
c_loader = DataLoader(w2vVectors, 'Cancer', classesfile)
p_loader = DataLoader(w2vVectors, 'Prog' , classesfile)
print('Building networks...')
c_classifier = TorchNN(c_loader, VECTOR_SIZE)
p_classifier = TorchNN(p_loader, VECTOR_SIZE)
print('Training networks...')
c_classifier.train(5000)
p_classifier.train(5000)
print('Saving classifiers...')
c_classifier.saveModel(cancerClassifierFileName)
p_classifier.saveModel(progClassifierFileName)