-
Notifications
You must be signed in to change notification settings - Fork 0
/
submit.py
27 lines (24 loc) · 988 Bytes
/
submit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import logging
from models.statModel import StatModel, BigramStatModel
from util.corpus import Corpus
from util.stat import ddd, dd # For loading pickle file
from util.ensemble import votingEnrich
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S',
)
votingEnrich([
'data/enrich_0.99890.txt',
'data/enrich_0.99889.txt',
'data/enrich_0.99892.txt',
'data/enrich_0.99901.txt',
'data/enrich_0.99893.txt',
])
X = Corpus.loadEnrichData('data/en_test_enrich.txt')
model = BigramStatModel('data/bigramFreqDict.pkl')
prediction = model.predict(X)
logging.info('Not found token rate: {}'.format(model.notFoundToken / 1088565.0))
logging.info('Found token but not found label rate: {}'.format(model.notFoundLabel / 1088565.0))
logging.info('Potential error rate: {}'.format(model.potentialError / 1088565.0))
Corpus.writePrediction(prediction, 'submission.csv')