diff --git a/README.md b/README.md index 8b5c4ad..0e9eba8 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,24 @@ import tika tika.TikaClientOnly = True ``` +Changing the Tika Classpath +--------------------------- +You can update the classpath that Tika server uses by +setting +```python +import tika +tika.TikaServerClasspath = '/tmp/keys' +``` + +Then, kill Tika server: + +```bash +ps aux | grep java | grep Tika +kill -9 PID +``` + +Then, try Tika, and you should see the new classpath. + Then you can run any of the methods and it will fully omit the check to see if the service on localhost is running and omit printing the check messages. diff --git a/tika/parser.py b/tika/parser.py index 405ccb7..2cc2e07 100644 --- a/tika/parser.py +++ b/tika/parser.py @@ -16,25 +16,25 @@ # limitations under the License. # -from .tika import parse1, callServer, ServerEndpoint, TikaServerClasspath +from .tika import parse1, callServer, ServerEndpoint import os import json -def from_file(filename, serverEndpoint=ServerEndpoint, xmlContent=False, classpath = TikaServerClasspath): +def from_file(filename, serverEndpoint=ServerEndpoint, xmlContent=False): if not xmlContent: - jsonOutput = parse1('all', filename, serverEndpoint, classpath = classpath) + jsonOutput = parse1('all', filename, serverEndpoint) else: - jsonOutput = parse1('all', filename, serverEndpoint, services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/xml'}, classpath = classpath) + jsonOutput = parse1('all', filename, serverEndpoint, services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/xml'}) return _parse(jsonOutput) -def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False, classpath = TikaServerClasspath): +def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False): if not xmlContent: status, response = callServer('put', serverEndpoint, '/rmeta/text', string, - {'Accept': 'application/json'}, False, classpath = classpath) + {'Accept': 'application/json'}, False) else: status, response = callServer('put', serverEndpoint, '/rmeta/xml', string, - {'Accept': 'application/json'}, False, classpath = classpath) + {'Accept': 'application/json'}, False) return _parse((status,response)) diff --git a/tika/tika.py b/tika/tika.py index 6670e5c..b137a88 100644 --- a/tika/tika.py +++ b/tika/tika.py @@ -206,7 +206,7 @@ def parse(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbose, ti def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, tikaServerJar=TikaServerJar, responseMimeType='application/json', - services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/text'}, classpath = TikaServerClasspath): + services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/text'}): """Parse the object and return extracted metadata and/or text in JSON format.""" path, file_type = getRemoteFile(urlOrPath, TikaFilesPath) if option not in services: @@ -215,7 +215,7 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti if service == '/tika': responseMimeType = 'text/plain' status, response = callServer('put', serverEndpoint, service, open(path, 'rb'), {'Accept': responseMimeType, 'Content-Disposition': 'attachment; filename=%s' % os.path.basename(path)}, - verbose, tikaServerJar, classpath = classpath) + verbose, tikaServerJar) if file_type == 'remote': os.unlink(path) return (status, response) @@ -319,7 +319,7 @@ def callServer(verb, serverEndpoint, service, data, headers, verbose=Verbose, ti port = parsedUrl.port global TikaClientOnly if not TikaClientOnly: - serverEndpoint = checkTikaServer(serverHost, port, tikaServerJar, classpath ) + serverEndpoint = checkTikaServer(serverHost, port, tikaServerJar, classpath) serviceUrl = serverEndpoint + service if verb not in httpVerbs: