From 563cc1171f8430d34109aca974da37b6b5df3da1 Mon Sep 17 00:00:00 2001 From: Chris Mattmann Date: Wed, 13 Jul 2016 15:51:14 -0400 Subject: [PATCH 1/4] Refactor TikaServerClasspath to be settable in a single place via tika.py, and then via ENV vars. --- tika/parser.py | 4 ++-- tika/tika.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tika/parser.py b/tika/parser.py index 405ccb7..d84d364 100644 --- a/tika/parser.py +++ b/tika/parser.py @@ -20,7 +20,7 @@ import os import json -def from_file(filename, serverEndpoint=ServerEndpoint, xmlContent=False, classpath = TikaServerClasspath): +def from_file(filename, serverEndpoint=ServerEndpoint, xmlContent=False): if not xmlContent: jsonOutput = parse1('all', filename, serverEndpoint, classpath = classpath) else: @@ -28,7 +28,7 @@ def from_file(filename, serverEndpoint=ServerEndpoint, xmlContent=False, classpa return _parse(jsonOutput) -def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False, classpath = TikaServerClasspath): +def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False): if not xmlContent: status, response = callServer('put', serverEndpoint, '/rmeta/text', string, {'Accept': 'application/json'}, False, classpath = classpath) diff --git a/tika/tika.py b/tika/tika.py index 6670e5c..674f953 100644 --- a/tika/tika.py +++ b/tika/tika.py @@ -206,7 +206,7 @@ def parse(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbose, ti def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, tikaServerJar=TikaServerJar, responseMimeType='application/json', - services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/text'}, classpath = TikaServerClasspath): + services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/text'}): """Parse the object and return extracted metadata and/or text in JSON format.""" path, file_type = getRemoteFile(urlOrPath, TikaFilesPath) if option not in services: From c7150330f333baac670c35b68fb38c5eb9d0aa3b Mon Sep 17 00:00:00 2001 From: Chris Mattmann Date: Wed, 13 Jul 2016 15:59:19 -0400 Subject: [PATCH 2/4] - remove classpath --- tika/parser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tika/parser.py b/tika/parser.py index d84d364..2cc2e07 100644 --- a/tika/parser.py +++ b/tika/parser.py @@ -16,25 +16,25 @@ # limitations under the License. # -from .tika import parse1, callServer, ServerEndpoint, TikaServerClasspath +from .tika import parse1, callServer, ServerEndpoint import os import json def from_file(filename, serverEndpoint=ServerEndpoint, xmlContent=False): if not xmlContent: - jsonOutput = parse1('all', filename, serverEndpoint, classpath = classpath) + jsonOutput = parse1('all', filename, serverEndpoint) else: - jsonOutput = parse1('all', filename, serverEndpoint, services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/xml'}, classpath = classpath) + jsonOutput = parse1('all', filename, serverEndpoint, services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/xml'}) return _parse(jsonOutput) def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False): if not xmlContent: status, response = callServer('put', serverEndpoint, '/rmeta/text', string, - {'Accept': 'application/json'}, False, classpath = classpath) + {'Accept': 'application/json'}, False) else: status, response = callServer('put', serverEndpoint, '/rmeta/xml', string, - {'Accept': 'application/json'}, False, classpath = classpath) + {'Accept': 'application/json'}, False) return _parse((status,response)) From bdbf6b330539a7e2742ca341dc835e9c2f817c32 Mon Sep 17 00:00:00 2001 From: Chris Mattmann Date: Wed, 13 Jul 2016 16:05:00 -0400 Subject: [PATCH 3/4] - last classpath variable, this closes #116 --- tika/tika.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tika/tika.py b/tika/tika.py index 674f953..b137a88 100644 --- a/tika/tika.py +++ b/tika/tika.py @@ -215,7 +215,7 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti if service == '/tika': responseMimeType = 'text/plain' status, response = callServer('put', serverEndpoint, service, open(path, 'rb'), {'Accept': responseMimeType, 'Content-Disposition': 'attachment; filename=%s' % os.path.basename(path)}, - verbose, tikaServerJar, classpath = classpath) + verbose, tikaServerJar) if file_type == 'remote': os.unlink(path) return (status, response) @@ -319,7 +319,7 @@ def callServer(verb, serverEndpoint, service, data, headers, verbose=Verbose, ti port = parsedUrl.port global TikaClientOnly if not TikaClientOnly: - serverEndpoint = checkTikaServer(serverHost, port, tikaServerJar, classpath ) + serverEndpoint = checkTikaServer(serverHost, port, tikaServerJar, classpath) serviceUrl = serverEndpoint + service if verb not in httpVerbs: From ec9f052388e43313367f96a29a1cd6a7df1e1cd2 Mon Sep 17 00:00:00 2001 From: Chris Mattmann Date: Wed, 13 Jul 2016 16:17:21 -0400 Subject: [PATCH 4/4] - update docs --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 8b5c4ad..0e9eba8 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,24 @@ import tika tika.TikaClientOnly = True ``` +Changing the Tika Classpath +--------------------------- +You can update the classpath that Tika server uses by +setting +```python +import tika +tika.TikaServerClasspath = '/tmp/keys' +``` + +Then, kill Tika server: + +```bash +ps aux | grep java | grep Tika +kill -9 PID +``` + +Then, try Tika, and you should see the new classpath. + Then you can run any of the methods and it will fully omit the check to see if the service on localhost is running and omit printing the check messages.