Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,24 @@ import tika
tika.TikaClientOnly = True
```

Changing the Tika Classpath
---------------------------
You can update the classpath that Tika server uses by
setting
```python
import tika
tika.TikaServerClasspath = '/tmp/keys'
```

Then, kill Tika server:

```bash
ps aux | grep java | grep Tika
kill -9 PID
```

Then, try Tika, and you should see the new classpath.

Then you can run any of the methods and it will fully
omit the check to see if the service on localhost is
running and omit printing the check messages.
Expand Down
14 changes: 7 additions & 7 deletions tika/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,25 @@
# limitations under the License.
#

from .tika import parse1, callServer, ServerEndpoint, TikaServerClasspath
from .tika import parse1, callServer, ServerEndpoint
import os
import json

def from_file(filename, serverEndpoint=ServerEndpoint, xmlContent=False, classpath = TikaServerClasspath):
def from_file(filename, serverEndpoint=ServerEndpoint, xmlContent=False):
if not xmlContent:
jsonOutput = parse1('all', filename, serverEndpoint, classpath = classpath)
jsonOutput = parse1('all', filename, serverEndpoint)
else:
jsonOutput = parse1('all', filename, serverEndpoint, services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/xml'}, classpath = classpath)
jsonOutput = parse1('all', filename, serverEndpoint, services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/xml'})
return _parse(jsonOutput)


def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False, classpath = TikaServerClasspath):
def from_buffer(string, serverEndpoint=ServerEndpoint, xmlContent=False):
if not xmlContent:
status, response = callServer('put', serverEndpoint, '/rmeta/text', string,
{'Accept': 'application/json'}, False, classpath = classpath)
{'Accept': 'application/json'}, False)
else:
status, response = callServer('put', serverEndpoint, '/rmeta/xml', string,
{'Accept': 'application/json'}, False, classpath = classpath)
{'Accept': 'application/json'}, False)

return _parse((status,response))

Expand Down
6 changes: 3 additions & 3 deletions tika/tika.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def parse(option, urlOrPaths, serverEndpoint=ServerEndpoint, verbose=Verbose, ti

def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, tikaServerJar=TikaServerJar,
responseMimeType='application/json',
services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/text'}, classpath = TikaServerClasspath):
services={'meta': '/meta', 'text': '/tika', 'all': '/rmeta/text'}):
"""Parse the object and return extracted metadata and/or text in JSON format."""
path, file_type = getRemoteFile(urlOrPath, TikaFilesPath)
if option not in services:
Expand All @@ -215,7 +215,7 @@ def parse1(option, urlOrPath, serverEndpoint=ServerEndpoint, verbose=Verbose, ti
if service == '/tika': responseMimeType = 'text/plain'
status, response = callServer('put', serverEndpoint, service, open(path, 'rb'),
{'Accept': responseMimeType, 'Content-Disposition': 'attachment; filename=%s' % os.path.basename(path)},
verbose, tikaServerJar, classpath = classpath)
verbose, tikaServerJar)

if file_type == 'remote': os.unlink(path)
return (status, response)
Expand Down Expand Up @@ -319,7 +319,7 @@ def callServer(verb, serverEndpoint, service, data, headers, verbose=Verbose, ti
port = parsedUrl.port
global TikaClientOnly
if not TikaClientOnly:
serverEndpoint = checkTikaServer(serverHost, port, tikaServerJar, classpath )
serverEndpoint = checkTikaServer(serverHost, port, tikaServerJar, classpath)

serviceUrl = serverEndpoint + service
if verb not in httpVerbs:
Expand Down