-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
Makefile
68 lines (53 loc) · 1.88 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Handwritten words dataset collected by
# Rob Kassel at MIT Spoken Language Systems Group
# http://ai.stanford.edu/~btaskar/ocr/
VW = ../../vowpalwabbit/vw
# same settings as do-mnist-train
VW_OPTS = -b 24 -l 0.1 --nn 40
# http://stackoverflow.com/questions/1541844/joining-elements-of-a-list-in-gnu-make
noop=
space = $(noop) $(noop)
VW_RUN = $(subst $(space),_,$(VW_OPTS))
RM = rm -f
# Python > 2.7 is required
PYTHON = python
help:
@echo handwritten words dataset collected by
@echo Rob Kassel at MIT Spoken Language Systems Group
@echo http://ai.stanford.edu/~btaskar/ocr/
@echo $$ make run
letter.data.gz:
wget http://ai.stanford.edu/~btaskar/ocr/letter.data.gz
letter.names:
wget http://ai.stanford.edu/~btaskar/ocr/letter.names
letter.vw: ocr2vw.py letter.data.gz letter.names
$(PYTHON) $^ $@ $@.test
cut -d' ' -f1 $@ | sort | uniq -c | sort -n
# category count
CATN = 26
letter.model_$(VW_RUN): letter.vw
time $(VW) --oaa $(CATN) --final_regressor $@ \
--adaptive --invariant --holdout_off \
--loss_function logistic --passes 14 \
$(VW_OPTS) --data $< -k --cache_file $<.cache_$(VW_RUN)
$(RM) $<.cache_$(VW_RUN)
letter.predictions_$(VW_RUN): letter.model_$(VW_RUN)
time $(VW) --testonly --initial_regressor $< --predictions $@ \
--data letter.vw.test
# taken almost verbatim from ../mnist/Makefile
CONFUSION='++$$n; $$p=int($$F[0]); $$l=ord($$F[1])-ord("a")+1; \
++$$c if $$p != $$l; \
++$$m{"$$l:$$p"}; } { \
print "$* test errors: $$c out of $$n = " . \
sprintf("%.2f%%",100*$$c/$$n) . \
"\nconfusion matrix (rows = truth, columns = prediction):"; \
foreach $$true (1 .. $(CATN)) { \
print join "\t", map { $$m{"$$true:$$_"} || 0 } (1 .. $(CATN)); \
}'
%.confusion_$(VW_RUN): %.predictions_$(VW_RUN)
@perl -lane $(CONFUSION) $< > $@
@cat $@
run : letter.confusion_$(VW_RUN)
clean:
$(RM) letter.*
.PHONY: clean run