Skip to content

Commit

Permalink
Merge pull request #2787 from jjkoehorst/dev
Browse files Browse the repository at this point in the history
HDT format added and turtle sniffer improved
  • Loading branch information
bgruening committed Aug 15, 2016
2 parents 62d69be + 6792645 commit f1f20d9
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
2 changes: 2 additions & 0 deletions config/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@
<datatype extension="sif" type="galaxy.datatypes.graph:Sif" display_in_upload="true"/>
<!-- datatypes storing triples -->
<datatype extension="triples" type="galaxy.datatypes.triples:Triples" display_in_upload="false"/>
<datatype extension="hdt" type="galaxy.datatypes.triples:HDT" display_in_upload="true"/>
<datatype extension="nt" type="galaxy.datatypes.triples:NTriples" display_in_upload="true"/>
<datatype extension="n3" type="galaxy.datatypes.triples:N3" display_in_upload="true"/>
<datatype extension="ttl" type="galaxy.datatypes.triples:Turtle" display_in_upload="true"/>
Expand Down Expand Up @@ -595,6 +596,7 @@
<sniffer type="galaxy.datatypes.proteomics:ThermoRAW"/>
<sniffer type="galaxy.datatypes.molecules:CML"/>
<sniffer type="galaxy.datatypes.xml:GenericXml"/>
<sniffer type="galaxy.datatypes.triples:HDT"/>
<sniffer type="galaxy.datatypes.triples:Turtle"/>
<sniffer type="galaxy.datatypes.triples:NTriples"/>
<sniffer type="galaxy.datatypes.triples:Jsonld"/>
Expand Down
39 changes: 34 additions & 5 deletions lib/galaxy/datatypes/triples.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
import logging
import xml
import text
import binary

log = logging.getLogger(__name__)


class Triples( data.Text ):
class Triples( data.Data ):
"""
The abstract base class for the file format that can contain triples
"""
Expand All @@ -34,7 +35,7 @@ def set_peek( self, dataset, is_multi_byte=False ):
dataset.blurb = 'file purged from disk'


class NTriples( Triples ):
class NTriples( data.Text, Triples ):
"""
The N-Triples triple data format
"""
Expand All @@ -58,7 +59,7 @@ def set_peek( self, dataset, is_multi_byte=False ):
dataset.blurb = 'file purged from disk'


class N3( Triples ):
class N3( data.Text, Triples ):
"""
The N3 triple data format
"""
Expand All @@ -81,7 +82,7 @@ def set_peek( self, dataset, is_multi_byte=False ):
dataset.blurb = 'file purged from disk'


class Turtle( Triples ):
class Turtle( data.Text, Triples ):
"""
The Turtle triple data format
"""
Expand All @@ -91,7 +92,10 @@ class Turtle( Triples ):
def sniff( self, filename ):
with open(filename, "r") as f:
# @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
if re.compile( r'@prefix\s+[^:]*:\s+<[^>]*>\s\.' ).search( f.readline( 1024 ) ):
line = f.readline( 1024 )
if re.compile( r'@prefix\s+[^:]*:\s+<[^>]*>\s\.' ).search( line ):
return True
if re.compile( r'@base\s+<[^>]*>\s\.' ).search( line ):
return True
return False

Expand Down Expand Up @@ -156,3 +160,28 @@ def set_peek( self, dataset, is_multi_byte=False ):
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'


class HDT( binary.Binary, Triples ):
"""
The HDT triple data format
"""
edam_format = "format_2376"
file_ext = "hdt"

def sniff( self, filename ):
with open(filename, "rb") as f:
if f.read(4) == "$HDT":
return True
return False

def set_peek( self, dataset, is_multi_byte=False ):
"""Set the peek and blurb text"""
if not dataset.dataset.purged:
dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
dataset.blurb = 'HDT triple data'
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'

binary.Binary.register_sniffable_binary_format('HDT', 'HDT', HDT)

0 comments on commit f1f20d9

Please sign in to comment.