forked from MITLibraries/oastats-backend
-
Notifications
You must be signed in to change notification settings - Fork 0
/
oastats.py
executable file
·66 lines (56 loc) · 2.24 KB
/
oastats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
current_dir = os.path.dirname(os.path.realpath(__file__))
os.environ.setdefault("OASTATS_SETTINGS",
os.path.join(current_dir, "settings.py"))
import fileinput
import sys
from pipeline.conf import settings
from pipeline import process
from pipeline.load_json import get_collection, insert
import logging
import apachelog
import requests
log = logging.getLogger("pipeline")
req_log = logging.getLogger("req_log")
collection = get_collection(settings.MONGO_DB,
settings.MONGO_COLLECTION,
settings.MONGO_CONNECTION)
def main():
"""Parse stream of requests and insert into MongoDB collection.
This script will accept input from either stdin or one or more files as
arguments. Two loggers control logging--one general purpose logger for the
application and one for logging requests that fail to make it through the
pipeline. The latter is configured to route different kinds of failures to
different streams as configured. The failed requests will be logged
unmodified, as they entered the pipeline, to make later attempts at
processing easier.
Failure to send any requests through the pipeline will result in an exit
status of 1.
"""
req_buffer = []
for line in fileinput.input():
try:
request = process(line)
except apachelog.ApacheLogParserError:
# log unparseable requests
req_log.error(line.strip(), extra={'err_type': 'REQUEST_ERROR'})
continue
except requests.exceptions.RequestException:
req_log.error(line.strip(), extra={'err_type': 'DSPACE_ERROR'})
continue
except Exception, e:
log.error(e, extra={'inputfile': fileinput.filename(),
'inputline': fileinput.filelineno()})
continue
if request:
req_buffer.append(request)
if len(req_buffer) > 999:
insert(collection, req_buffer)
req_buffer = []
if req_buffer:
insert(collection, req_buffer)
if not fileinput.lineno():
sys.exit("No requests to process")
log.info("{0} requests processed".format(fileinput.lineno()))
if __name__ == '__main__':
main()