-
Notifications
You must be signed in to change notification settings - Fork 1
/
extract.py
executable file
·68 lines (61 loc) · 2.68 KB
/
extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python3
import os
import zipfile
import gzip
import config
configuration = config.load_config()
def process_zip_file(filename, dest):
message_id, real_filename = filename.split('-', 1)
with zipfile.ZipFile(filename, mode='r') as f:
if len(f.namelist()) != 1:
raise Exception("ZIP archive '{}' has not precisely one file!".format(filename))
xmlfile = f.namelist()[0]
if '/' in xmlfile:
xmlfile = xmlfile.rsplit('/', 1)[1]
if '/' in xmlfile or '\\' in xmlfile:
raise Exception("ZIP archive '{}' contains a file '{}' in a subfolder!".format(filename, xmlfile))
fn, ext = os.path.splitext(xmlfile)
if ext != '.xml':
raise Exception("ZIP archive '{}' contains a non-XML file '{}'!".format(filename, xmlfile))
bn = os.path.splitext(os.path.basename(real_filename))[0]
bns = [bn, os.path.splitext(bn)[0]]
if fn not in bns:
raise Exception("ZIP archive '{}' contains a XML file called '{}', but it should contain one called {}!".format(filename, fn, ' or '.join(["'{}'".format(bn) for bn in bns])))
new_filename = '{}-{}'.format(message_id, xmlfile)
if os.path.exists(os.path.join(dest, new_filename)):
raise Exception("Destination file '{}' already exists!".format(new_filename))
member = f.getinfo(xmlfile)
member.filename = new_filename
f.extract(member, dest)
os.unlink(filename)
def process_gzip_file(filename, dest):
message_id, real_filename = filename.split('-', 1)
if filename.endswith('.gz'):
xmlfile = filename[:-len('.gz')]
else:
xmlfile = filename[:-len('.gzip')]
with gzip.open(filename, mode='rb') as f:
content = f.read()
if os.path.exists(os.path.join(dest, xmlfile)):
raise Exception("Destination file '{}' already exists!".format(xmlfile))
with open(os.path.join(dest, xmlfile), 'wb') as f:
f.write(content)
os.unlink(filename)
def process(source, dest):
for dirpath, _, filenames in os.walk(source):
for filename in filenames:
success = False
try:
if filename.endswith('.zip'):
process_zip_file(os.path.join(dirpath, filename), dest)
success = True
if filename.endswith('.gz') or filename.endswith('.gzip'):
process_gzip_file(os.path.join(dirpath, filename), dest)
success = True
except Exception as e:
print(e)
if success:
print('Successfully processed {}.'.format(os.path.join(dirpath, filename)))
source = '.'
dest = 'files/'
process(source, dest)