forked from proycon/valkuil
-
Notifications
You must be signed in to change notification settings - Fork 0
/
errorcorpus2folia.py
executable file
·33 lines (25 loc) · 1.13 KB
/
errorcorpus2folia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#! /usr/bin/env python
# -*- coding: utf8 -*-
import sys
import glob
import os
try:
dcoixsl = sys.argv[1]
errorcorpusxsl = sys.argv[2]
sourcedir = sys.argv[3]
inputoutputdir = sys.argv[4]
refoutputdir = sys.argv[5]
tmpdir = '/tmp/'
except:
print >>sys.stderr ,"Usage: errorcorpus2folia.py dcoi2folia.xsl errorcorpus2folia.xsl sourcedir inputoutputdir refoutputdir"
sys.exit(2)
for inputfilename in glob.glob(sourcedir + '/*.xml'):
print >>sys.stderr, "Processing " + inputfilename
#default namespace got thrashed! restore
tmpfilename = tmpdir + os.path.basename(inputfilename)
os.system("sed 's/<DCOI /<DCOI xmlns=\"http:\\/\\/lands.let.ru.nl\\/projects\\/d-coi\\/ns\\/1.0\" /g' " + inputfilename + ' > ' + tmpfilename)
inputoutputfilename = inputoutputdir + os.path.basename(inputfilename)
refoutputfilename = refoutputdir + os.path.basename(inputfilename)
os.system('xsltproc ' + dcoixsl + ' ' + tmpfilename + ' > ' + inputoutputfilename)
os.system('xsltproc ' + errorcorpusxsl + ' ' + tmpfilename + ' > ' + refoutputfilename)
os.unlink(tmpfilename)