Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

webapp fixed

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@283 1aa58f4a-7d42-0410-adbc-911cccaed67c
  • Loading branch information...
commit 866f2bbb75df159c0f142e9f9082a86f7848cf0b 1 parent 5d98a27
yusuke.shinyama.dummy authored
Showing with 144 additions and 30 deletions.
  1. +1 −1  tools/Makefile
  2. +30 −29 tools/pdf2html.cgi
  3. +113 −0 tools/runapp.py
View
2  tools/Makefile
@@ -5,4 +5,4 @@ RM=rm -f
all:
clean:
- -$(RM) *.pyc *.pyo
+ -$(RM) *.pyc *.pyo *.cgic *.cgio
View
59 tools/pdf2html.cgi
@@ -77,21 +77,23 @@ class WebApp(object):
TITLE = 'pdf2html demo'
MAXFILESIZE = 10000000 # set to zero if unlimited.
- MAXPAGES = 10 # set to zero if unlimited.
+ MAXPAGES = 100 # set to zero if unlimited.
def __init__(self, infp=sys.stdin, outfp=sys.stdout, environ=os.environ,
codec='utf-8', apppath='/'):
self.infp = infp
self.outfp = outfp
+ self.environ = environ
self.codec = codec
self.apppath = apppath
- self.remote_addr = environ.get('REMOTE_ADDR')
- self.path_info = environ.get('PATH_INFO')
- self.method = environ.get('REQUEST_METHOD', 'GET').upper()
- self.server = environ.get('SERVER_SOFTWARE', '')
- self.tmpdir = environ.get('TEMP', './var/')
+ self.remote_addr = self.environ.get('REMOTE_ADDR')
+ self.path_info = self.environ.get('PATH_INFO')
+ self.method = self.environ.get('REQUEST_METHOD', 'GET').upper()
+ self.server = self.environ.get('SERVER_SOFTWARE', '')
+ self.tmpdir = self.environ.get('TEMP', './var/')
self.content_type = 'text/html; charset=%s' % codec
self.logger = logging.getLogger()
+ logging.basicConfig(level=10,stream=sys.stderr)
return
def put(self, *args):
@@ -102,7 +104,7 @@ class WebApp(object):
self.outfp.write(x.encode(self.codec, 'xmlcharrefreplace'))
return
- def http_200(self):
+ def response_200(self):
if self.server.startswith('cgi-httpd'):
# required for cgi-httpd
self.outfp.write('HTTP/1.0 200 OK\r\n')
@@ -110,7 +112,7 @@ class WebApp(object):
self.outfp.write('Connection: close\r\n\r\n')
return
- def http_404(self):
+ def response_404(self):
if self.server.startswith('cgi-httpd'):
# required for cgi-httpd
self.outfp.write('HTTP/1.0 404 Not Found\r\n')
@@ -119,7 +121,7 @@ class WebApp(object):
self.outfp.write('<html><body>page does not exist</body></body>\n')
return
- def http_301(self, url):
+ def response_301(self, url):
if self.server.startswith('cgi-httpd'):
# required for cgi-httpd
self.outfp.write('HTTP/1.0 301 Moved\r\n')
@@ -146,53 +148,52 @@ class WebApp(object):
return
def setup(self):
+ self.run = self.response_404
+ status = 404
if not os.path.isdir(self.tmpdir):
self.logger.error('no tmpdir')
status = 304
- elif self.path_info != self.apppath:
- status = 404
- else:
+ elif self.path_info == self.apppath:
+ self.run = self.convert
status = 200
- self._status = status
return status
- def run(self):
- form = cgi.FieldStorage(self.infp)
- if self._status != 200:
- self.http_404()
- return
+ def convert(self):
+ self.form = cgi.FieldStorage(fp=self.infp, environ=self.environ)
if (self.method != 'POST' or
- 'c' not in form or
- 'f' not in form):
+ 'c' not in self.form or
+ 'f' not in self.form):
+ self.response_200()
self.coverpage()
return
- item = form['f']
+ item = self.form['f']
if not (item.file and item.filename):
+ self.response_200()
self.coverpage()
return
- cmd = form.getvalue('c')
+ cmd = self.form.getvalue('c')
html = (cmd == 'Convert to HTML')
pagenos = []
- if 'p' in form:
- for m in re.finditer(r'\d+', form.getvalue('p')):
+ if 'p' in self.form:
+ for m in re.finditer(r'\d+', self.form.getvalue('p')):
try:
pagenos.append(int(m.group(0)))
except ValueError:
pass
- self.logger.info('received: host=%s, name=%r, pagenos=%r' %
- (self.remote_addr, item.filename, pagenos))
h = abs(hash((random.random(), self.remote_addr, item.filename)))
tmppath = os.path.join(self.tmpdir, '%08x%08x.pdf' % (time.time(), h))
+ self.logger.info('received: host=%s, name=%r, pagenos=%r, tmppath=%r' %
+ (self.remote_addr, item.filename, pagenos, tmppath))
try:
if not html:
self.content_type = 'text/plain; charset=%s' % self.codec
- self.http_200()
+ self.response_200()
try:
- convert(item.file, sys.stdout, tmppath, pagenos=pagenos, codec=self.codec,
+ convert(item.file, self.outfp, tmppath, pagenos=pagenos, codec=self.codec,
maxpages=self.MAXPAGES, maxfilesize=self.MAXFILESIZE, html=html)
except Exception, e:
self.put('<p>Sorry, an error has occured: %s' % q(repr(e)))
- self.logger.error('convert: %r: path=%r: %s' % (e, tmppath, traceback.format_exc()))
+ self.logger.error('convert: %r: path=%r: %s' % (e, traceback.format_exc()))
finally:
try:
os.remove(tmppath)
View
113 tools/runapp.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python2
+##
+## WebApp class runner
+##
+## usage:
+## $ runapp.py pdf2html.cgi
+##
+
+import sys
+import urllib
+from httplib import responses
+from BaseHTTPServer import HTTPServer
+from SimpleHTTPServer import SimpleHTTPRequestHandler
+
+## WebAppHandler
+##
+class WebAppHandler(SimpleHTTPRequestHandler):
+
+ APP_CLASS = None
+
+ def do_POST(self):
+ return self.run_cgi()
+
+ def send_head(self):
+ return self.run_cgi()
+
+ def run_cgi(self):
+ rest = self.path
+ i = rest.rfind('?')
+ if i >= 0:
+ rest, query = rest[:i], rest[i+1:]
+ else:
+ query = ''
+ i = rest.find('/')
+ if i >= 0:
+ script, rest = rest[:i], rest[i:]
+ else:
+ script, rest = rest, ''
+ scriptname = '/' + script
+ scriptfile = self.translate_path(scriptname)
+ env = {}
+ env['SERVER_SOFTWARE'] = self.version_string()
+ env['SERVER_NAME'] = self.server.server_name
+ env['GATEWAY_INTERFACE'] = 'CGI/1.1'
+ env['SERVER_PROTOCOL'] = self.protocol_version
+ env['SERVER_PORT'] = str(self.server.server_port)
+ env['REQUEST_METHOD'] = self.command
+ uqrest = urllib.unquote(rest)
+ env['PATH_INFO'] = uqrest
+ env['PATH_TRANSLATED'] = self.translate_path(uqrest)
+ env['SCRIPT_NAME'] = scriptname
+ if query:
+ env['QUERY_STRING'] = query
+ host = self.address_string()
+ if host != self.client_address[0]:
+ env['REMOTE_HOST'] = host
+ env['REMOTE_ADDR'] = self.client_address[0]
+ if self.headers.typeheader is None:
+ env['CONTENT_TYPE'] = self.headers.type
+ else:
+ env['CONTENT_TYPE'] = self.headers.typeheader
+ length = self.headers.getheader('content-length')
+ if length:
+ env['CONTENT_LENGTH'] = length
+ accept = []
+ for line in self.headers.getallmatchingheaders('accept'):
+ if line[:1] in "\t\n\r ":
+ accept.append(line.strip())
+ else:
+ accept = accept + line[7:].split(',')
+ env['HTTP_ACCEPT'] = ','.join(accept)
+ ua = self.headers.getheader('user-agent')
+ if ua:
+ env['HTTP_USER_AGENT'] = ua
+ co = filter(None, self.headers.getheaders('cookie'))
+ if co:
+ env['HTTP_COOKIE'] = ', '.join(co)
+ for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
+ 'HTTP_USER_AGENT', 'HTTP_COOKIE'):
+ env.setdefault(k, "")
+ app = self.APP_CLASS(infp=self.rfile, outfp=self.wfile, environ=env)
+ status = app.setup()
+ self.send_response(status, responses[status])
+ app.run()
+ return
+
+# main
+def main(argv):
+ import getopt, imp
+ def usage():
+ print 'usage: %s [-h host] [-p port] [-n name] module.class' % argv[0]
+ return 100
+ try:
+ (opts, args) = getopt.getopt(argv[1:], 'h:p:n:')
+ except getopt.GetoptError:
+ return usage()
+ host = ''
+ port = 8080
+ name = 'WebApp'
+ for (k, v) in opts:
+ if k == '-h': host = v
+ elif k == '-p': port = int(v)
+ elif k == '-n': name = v
+ if not args: return usage()
+ path = args.pop(0)
+ module = imp.load_source('app', path)
+ WebAppHandler.APP_CLASS = getattr(module, name)
+ print 'Listening %s:%d...' % (host,port)
+ httpd = HTTPServer((host,port), WebAppHandler)
+ httpd.serve_forever()
+ return
+
+if __name__ == '__main__': sys.exit(main(sys.argv))
Please sign in to comment.
Something went wrong with that request. Please try again.