Skip to content

Commit

Permalink
added tika metadata parser process
Browse files Browse the repository at this point in the history
  • Loading branch information
cehbrecht committed Jul 14, 2015
1 parent 8362ee0 commit 5437df9
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 0 deletions.
1 change: 1 addition & 0 deletions buildout.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pkgs =
netcdf4
owslib
pywps
tika

[hummingbird]
recipe = zc.recipe.egg
Expand Down
1 change: 1 addition & 0 deletions processes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
"wps_ncmeta",
"wps_cdo",
"wps_cfchecker",
"wps_tika",
]
54 changes: 54 additions & 0 deletions processes/wps_tika.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os

from malleefowl.process import WPSProcess

from malleefowl import wpslogging as logging
logger = logging.getLogger(__name__)

class Tika(WPSProcess):
def __init__(self):
WPSProcess.__init__(self,
identifier = "tika",
title = "Tika Metadata Parser",
version = "0.1",
abstract="Extracts Metadata of Files")

self.resource = self.addComplexInput(
identifier="resource",
title="File",
minOccurs=1,
maxOccurs=100,
maxmegabites=5000,
formats=[{"mimeType":"application/x-netcdf"}],
)

self.output = self.addComplexOutput(
identifier="output",
title="Tika Result",
formats=[{"mimeType":"application/json"}],
asReference=True,
)

def execute(self):
self.show_status("starting tika", 0)

resources = self.getInputValues(identifier='resource')

import tika
from tika import parser

metadata = []
for resource in resources:
parsed = parser.from_file(resource)
metadata.append( parsed["metadata"] )

import json
out_filename = self.mktempfile(suffix='.json')
with open(out_filename, 'w') as fp:
json.dump(obj=metadata, fp=fp, indent=4, sort_keys=True)
self.output.setValue( out_filename )

self.show_status("tika done", 100)



0 comments on commit 5437df9

Please sign in to comment.