-
Notifications
You must be signed in to change notification settings - Fork 0
/
write2arc.py
36 lines (27 loc) · 1.14 KB
/
write2arc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import datetime
import os
from warc.gzip2 import GzipFile
from arcfile2 import ARCFile2
__author__ = "Daniel Bicho"
__email__ = "daniel.bicho@fccn.pt"
class Write2Arc(object):
def __create_arc(self):
self.arc_filename = '{}-{}-{}.arc.gz'.format(self.filename, datetime.datetime.utcnow().strftime(
'%Y%m%d%H%M%S'), str(self._number_arcs).zfill(5))
self.f = GzipFile(self.arc_filename, mode='wb')
self.arc_file = ARCFile2(file_headers={'ip_address': '0.0.0.0',
'date': datetime.datetime.utcnow().strftime('%Y%m%d%H%M%S'),
'org': 'Arquivo.pt'}, fileobj=self.f, version=1)
def __init__(self, filename, max_size):
self._number_arcs = 0
self.filename = filename
self.max_size = max_size
self.__create_arc()
def write_record(self, arc_record):
self.arc_file.write(arc_record)
size = os.path.getsize(self.arc_filename)
if size >= self.max_size:
self.arc_file.close()
self._number_arcs += 1
# rodar arc
self.__create_arc()