Skip to content

Commit

Permalink
Refactor Resource class according to v1 specs (#44)
Browse files Browse the repository at this point in the history
  • Loading branch information
georgiana-b committed Aug 6, 2017
1 parent 1bef8dc commit 3daa3a2
Show file tree
Hide file tree
Showing 10 changed files with 2,467 additions and 144 deletions.
6 changes: 3 additions & 3 deletions lib/datapackage/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def join_paths(base_path, resource)
if base_path.nil? || base_path.empty?
resource
elsif base_path =~ /\A#{URI::regexp}\z/
URI.join(base_path, resource)
URI.join(base_path, resource).to_s
elsif File.directory?(base_path)
File.join(base_path, resource)
File.join(base_path, resource).to_s
elsif File.file?(base_path)
base_path
else
Expand All @@ -90,7 +90,7 @@ def is_fully_qualified_url?(string)
def is_safe_path?(string)
path = Pathname.new(string)
return false if path.absolute?
return false unless /^\.+$/.match(path.split[0].to_s).nil?
return false unless /^\.+$/.match(path.to_s.split('/').first).nil?
true
end

Expand Down
50 changes: 41 additions & 9 deletions lib/datapackage/resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,55 @@ module DataPackage
class Resource < Hash
include DataPackage::Helpers

attr_reader :data
attr_reader :name, :profile, :source, :source_type, :valid, :errors

def initialize(resource, base_path = '')
resource = dereference_descriptor(resource, base_path: base_path,
reference_fields: ['schema', 'dialect'])
if resource.fetch('data', nil)
@data = resource['data']
elsif resource.fetch('path', nil)
@data = open(join_paths(base_path, resource['path'])).read
else
raise ResourceException.new 'A resource descriptor must have a `path` or `data` property.'
end
self.merge! resource
@profile = DataPackage::Profile.new(self.fetch('profile', 'data-resource'))
@name = self['name']
get_source!(base_path)
end

def table
@table ||= TableSchema::Table.new(CSV.parse(data), self['schema']) if self['schema']
@table ||= TableSchema::Table.new(self.source, self['schema']) if tabular?
end

def tabular?
tabular_profile = 'tabular-data-resource'
return true if @profile.name == tabular_profile
return true if DataPackage::Profile.new(tabular_profile).valid?(self)
false
end

alias :tabular :tabular?

def valid?
validate
@valid
end

def validate
@errors = @profile.validate(self)
@valid = @profile.valid?(self)
end

private

def get_source!(base_path)
if self.fetch('data', nil)
@source = self['data']
@source_type = 'inline'
elsif self.fetch('path', nil)
unless is_safe_path?(self['path'])
raise ResourceException.new "Path `#{self['path']}` is not safe"
end
@source = join_paths(base_path, self['path'])
@source_type = is_fully_qualified_url?(@source) ? 'remote' : 'local'
else
raise ResourceException.new 'A resource descriptor must have a `path` or `data` property.'
end
end

end
Expand Down
278 changes: 278 additions & 0 deletions lib/profiles/data-resource.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Data Resource",
"description": "Data Resource.",
"type": "object",
"oneOf": [
{
"required": [
"name",
"data"
]
},
{
"required": [
"name",
"path"
]
}
],
"properties": {
"profile": {
"propertyOrder": 10,
"title": "Profile",
"description": "The profile of this descriptor.",
"context": "Every Package and Resource descriptor has a profile. The default profile, if none is declared, is `default`. The namespace for the profile is the type of descriptor, so, `default` for a Package descriptor is not the same as `default` for a Resource descriptor.",
"type": "string",
"default": "default",
"examples": [
"{\n \"profile\": \"tabular-data-package\"\n}\n",
"{\n \"profile\": \"http://example.com/my-profiles-json-schema.json\"\n}\n"
]
},
"name": {
"propertyOrder": 20,
"title": "Name",
"description": "An identifier string. Lower case characters with `.`, `_`, `-` and `/` are allowed.",
"type": "string",
"pattern": "^([-a-z0-9._/])+$",
"context": "This is ideally a url-usable and human-readable name. Name `SHOULD` be invariant, meaning it `SHOULD NOT` change when its parent descriptor is updated.",
"examples": [
"{\n \"name\": \"my-nice-name\"\n}\n"
]
},
"path": {
"propertyOrder": 30,
"title": "Path",
"description": "A reference to the data for this resource, as either a path as a string, or an array of paths as strings. of valid URIs.",
"oneOf": [
{
"title": "Path",
"description": "A fully qualified URL, or a POSIX file path..",
"type": "string",
"examples": [
"{\n \"path\": \"file.csv\"\n}\n",
"{\n \"path\": \"http://example.com/file.csv\"\n}\n"
],
"context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
},
{
"type": "array",
"minItems": 1,
"items": {
"title": "Path",
"description": "A fully qualified URL, or a POSIX file path..",
"type": "string",
"examples": [
"{\n \"path\": \"file.csv\"\n}\n",
"{\n \"path\": \"http://example.com/file.csv\"\n}\n"
],
"context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
},
"examples": [
"[ \"file.csv\" ]\n",
"[ \"http://example.com/file.csv\" ]\n"
]
}
],
"context": "The dereferenced value of each referenced data source in `path` `MUST` be commensurate with a native, dereferenced representation of the data the resource describes. For example, in a *Tabular* Data Resource, this means that the dereferenced value of `path` `MUST` be an array.",
"examples": [
"{\n \"path\": [\n \"file.csv\",\n \"file2.csv\"\n ]\n}\n",
"{\n \"path\": [\n \"http://example.com/file.csv\",\n \"http://example.com/file2.csv\"\n ]\n}\n",
"{\n \"path\": \"http://example.com/file.csv\"\n}\n"
]
},
"data": {
"propertyOrder": 230,
"title": "Data",
"description": "Inline data for this resource."
},
"schema": {
"propertyOrder": 40,
"title": "Schema",
"description": "A schema for this resource.",
"type": "object"
},
"title": {
"propertyOrder": 50,
"title": "Title",
"description": "A human-readable title.",
"type": "string",
"examples": [
"{\n \"title\": \"My Package Title\"\n}\n"
]
},
"description": {
"propertyOrder": 60,
"format": "textarea",
"title": "Description",
"description": "A text description. Markdown is encouraged.",
"type": "string",
"examples": [
"{\n \"description\": \"# My Package description\\nAll about my package.\"\n}\n"
]
},
"homepage": {
"propertyOrder": 70,
"title": "Home Page",
"description": "The home on the web that is related to this data package.",
"type": "string",
"format": "uri",
"examples": [
"{\n \"homepage\": \"http://example.com/\"\n}\n"
]
},
"sources": {
"propertyOrder": 140,
"options": {
"hidden": true
},
"title": "Sources",
"description": "The raw sources for this resource.",
"type": "array",
"minItems": 1,
"items": {
"title": "Source",
"description": "A source file.",
"type": "object",
"required": [
"title"
],
"properties": {
"title": {
"title": "Title",
"description": "A human-readable title.",
"type": "string",
"examples": [
"{\n \"title\": \"My Package Title\"\n}\n"
]
},
"path": {
"title": "Path",
"description": "A fully qualified URL, or a POSIX file path..",
"type": "string",
"examples": [
"{\n \"path\": \"file.csv\"\n}\n",
"{\n \"path\": \"http://example.com/file.csv\"\n}\n"
],
"context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
},
"email": {
"title": "Email",
"description": "An email address.",
"type": "string",
"format": "email",
"examples": [
"{\n \"email\": \"example@example.com\"\n}\n"
]
}
}
},
"examples": [
"{\n \"sources\": [\n {\n \"name\": \"World Bank and OECD\",\n \"uri\": \"http://data.worldbank.org/indicator/NY.GDP.MKTP.CD\"\n }\n ]\n}\n"
]
},
"licenses": {
"description": "The license(s) under which the resource is published.",
"propertyOrder": 150,
"options": {
"hidden": true
},
"title": "Licenses",
"type": "array",
"minItems": 1,
"items": {
"title": "License",
"description": "A license for this descriptor.",
"type": "object",
"properties": {
"name": {
"title": "Open Definition license identifier",
"description": "MUST be an Open Definition license identifier, see http://licenses.opendefinition.org/",
"type": "string",
"pattern": "^([-a-zA-Z0-9._])+$"
},
"path": {
"title": "Path",
"description": "A fully qualified URL, or a POSIX file path..",
"type": "string",
"examples": [
"{\n \"path\": \"file.csv\"\n}\n",
"{\n \"path\": \"http://example.com/file.csv\"\n}\n"
],
"context": "Implementations need to negotiate the type of path provided, and dereference the data accordingly."
},
"title": {
"title": "Title",
"description": "A human-readable title.",
"type": "string",
"examples": [
"{\n \"title\": \"My Package Title\"\n}\n"
]
}
},
"context": "Use of this property does not imply that the person was the original creator of, or a contributor to, the data in the descriptor, but refers to the composition of the descriptor itself."
},
"context": "This property is not legally binding and does not guarantee that the package is licensed under the terms defined herein.",
"examples": [
"{\n \"licenses\": [\n {\n \"name\": \"odc-pddl-1.0\",\n \"uri\": \"http://opendatacommons.org/licenses/pddl/\"\n }\n ]\n}\n"
]
},
"format": {
"propertyOrder": 80,
"title": "Format",
"description": "The file format of this resource.",
"context": "`csv`, `xls`, `json` are examples of common formats.",
"type": "string",
"examples": [
"{\n \"format\": \"xls\"\n}\n"
]
},
"mediatype": {
"propertyOrder": 90,
"title": "Media Type",
"description": "The media type of this resource. Can be any valid media type listed with [IANA](https://www.iana.org/assignments/media-types/media-types.xhtml).",
"type": "string",
"pattern": "^(.+)/(.+)$",
"examples": [
"{\n \"mediatype\": \"text/csv\"\n}\n"
]
},
"encoding": {
"propertyOrder": 100,
"title": "Encoding",
"description": "The file encoding of this resource.",
"type": "string",
"default": "utf-8",
"examples": [
"{\n \"encoding\": \"utf-8\"\n}\n"
]
},
"bytes": {
"propertyOrder": 110,
"options": {
"hidden": true
},
"title": "Bytes",
"description": "The size of this resource in bytes.",
"type": "integer",
"examples": [
"{\n \"bytes\": 2082\n}\n"
]
},
"hash": {
"propertyOrder": 120,
"options": {
"hidden": true
},
"title": "Hash",
"type": "string",
"description": "The MD5 hash of this resource. Indicate other hashing algorithms with the {algorithm}:{hash} format.",
"pattern": "^([^:]+:[a-fA-F0-9]+|[a-fA-F0-9]{32}|)$",
"examples": [
"{\n \"hash\": \"d25c9c77f588f5dc32059d2da1136c02\"\n}\n",
"{\n \"hash\": \"SHA256:5262f12512590031bbcc9a430452bfd75c2791ad6771320bb4b5728bfb78c4d0\"\n}\n"
]
}
}
}
16 changes: 15 additions & 1 deletion lib/profiles/registry.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,19 @@
"schema": "https://specs.frictionlessdata.io/schemas/table-schema.json",
"schema_path": "table-schema.json",
"specification": "https://specs.frictionlessdata.io/table-schema/"
},
{
"id": "data-resource",
"title": "Data Resource",
"schema": "https://specs.frictionlessdata.io/schemas/data-resource.json",
"schema_path": "data-resource.json",
"specification": "https://specs.frictionlessdata.io/data-resource"
},
{
"id": "tabular-data-resource",
"title": "Tabular Data Resource",
"schema": "https://specs.frictionlessdata.io/schemas/tabular-data-resource.json",
"schema_path": "tabular-data-resource.json",
"specification": "https://specs.frictionlessdata.io/tabular-data-resource/"
}
]
]
Loading

0 comments on commit 3daa3a2

Please sign in to comment.