In [1]:
from frictionless import describe, validate, Package, Resource
import pprint

pp = pprint.PrettyPrinter(depth=4)

csvname = "./capacity.csv"

In [2]:
capacity_schema = describe(csvname, type='resource')
pp.pprint(capacity_schema)

{'name': 'capacity',
 'type': 'table',
 'path': './capacity.csv',
 'scheme': 'file',
 'format': 'csv',
 'mediatype': 'text/csv',
 'encoding': 'utf-8',
 'schema': {'fields': [{'name': 'grid_id', 'type': 'integer'},
                       {'name': 'capacity_mw', 'type': 'number'},
                       {'name': 'fuel_type', 'type': 'string'},
                       {'name': 'year', 'type': 'integer'}]}}


In [3]:
capacity_schema.description = "Annual capacity for a grid in Alaska"

capacity_schema.schema.get_field("grid_id").title = "Grid ID"
capacity_schema.schema.get_field("grid_id").description = "A unique identifier for the grid for which capacity is calculated"

capacity_schema.schema.get_field("capacity_mw").title = "Generation Capacity"
capacity_schema.schema.get_field("capacity_mw").description = "The maximum amount of electricity that can be generated at any given time in megawatts (MW)"
# Note: I can try and add a new field but it doesn't stick
capacity_schema.schema.get_field("capacity_mw").units = "Megawatts"

capacity_schema.schema.get_field("fuel_type").title = "Fuel Type"
capacity_schema.schema.get_field("fuel_type").description = "Type of fuel used for generation"

capacity_schema.schema.get_field("year").title = "Year"
capacity_schema.schema.get_field("year").description = "Year for which capacity is calculated"
capacity_schema.schema.set_field_type("year", "year")
capacity_schema.schema.get_field("year").constraints["minimum"] = 2010
capacity_schema.schema.get_field("year").constraints["maximum"] = 2025

pp.pprint(capacity_schema)


{'name': 'capacity',
 'type': 'table',
 'description': 'Annual capacity for a grid in Alaska',
 'path': './capacity.csv',
 'scheme': 'file',
 'format': 'csv',
 'mediatype': 'text/csv',
 'encoding': 'utf-8',
 'schema': {'fields': [{'name': 'grid_id',
                        'type': 'integer',
                        'title': 'Grid ID',
                        'description': 'A unique identifier for the grid for '
                                       'which capacity is calculated'},
                       {'name': 'capacity_mw',
                        'type': 'number',
                        'title': 'Generation Capacity',
                        'description': 'The maximum amount of electricity that '
                                       'can be generated at any given time in '
                                       'megawatts (MW)'},
                       {'name': 'fuel_type',
                        'type': 'string',
                        'title': 'Fuel Type',
               

In [4]:
# Now that capacity is a resource, adding a foreign key to another resouce that doesn't exist invalidates the package

"""
f_keys = []
f_keys.append({
  "fields": ["grid_id"],
  "reference": {
      "resource": "grids",        
      "fields": ["grid_id"]
  }            
})

capacity_schema.schema.foreign_keys = f_keys

pp.pprint(capacity_schema)
"""

'\nf_keys = []\nf_keys.append({\n  "fields": ["grid_id"],\n  "reference": {\n      "resource": "grids",        \n      "fields": ["grid_id"]\n  }            \n})\n\ncapacity_schema.schema.foreign_keys = f_keys\n\npp.pprint(capacity_schema)\n'

In [5]:
"""report = validate('table.csv', schema='schema.json')
print(report)"""

capacity_schema.validate()

{'valid': True,
 'errors': [],
 'tasks': [{'name': 'capacity',
            'type': 'table',
            'valid': True,
            'place': './capacity.csv',
            'labels': ['grid_id', 'capacity_mw', 'fuel_type', 'year'],
            'stats': {'errors': 0,
                      'seconds': 0.004,
                      'md5': 'cd51cd1ad519075f702c958ff5bad12d',
                      'sha256': '1f151eca80760575509dfb006652d365c9a83c18276a3be5e525cb8768edba3c',
                      'bytes': 7561,
                      'fields': 4,
                      'rows': 492},
            'errors': []}]}

In [6]:
long_description = """ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed fermentum eros maximus, ullamcorper neque quis, commodo lorem. Maecenas non dui vel mauris aliquam tincidunt ac eget magna. Nullam lacinia sem ac est laoreet, ac ultricies dui bibendum. Aenean feugiat, eros nec consequat egestas, nibh leo posuere sem, non tristique nunc magna sit amet leo. Cras ultrices lorem fringilla urna commodo euismod. Ut pulvinar dictum tortor placerat convallis. Praesent eget ante a nisi ornare vestibulum mattis id velit. Cras dictum sapien enim, id placerat orci fringilla eget. Etiam in fermentum nisi. Maecenas ac tortor nunc.

Donec ex ligula, ultricies sed laoreet vel, posuere ut turpis. Nulla in leo ac leo auctor dignissim. Vestibulum eu enim at felis lacinia hendrerit. Suspendisse potenti. Nullam vel leo vestibulum nisl vehicula aliquam sit amet id ligula. Mauris nec mauris libero. Interdum et malesuada fames ac ante ipsum primis in faucibus. Sed mauris mauris, varius egestas sapien volutpat, rutrum varius ligula.
"""

package = Package(
    name='test_datapackage',
    title='Test Package for Capacity',
    description=long_description,
    resources=[capacity_schema]
)
pp.pprint(package)

{'name': 'test_datapackage',
 'title': 'Test Package for Capacity',
 'description': ' Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed '
                'fermentum eros maximus, ullamcorper neque quis, commodo '
                'lorem. Maecenas non dui vel mauris aliquam tincidunt ac eget '
                'magna. Nullam lacinia sem ac est laoreet, ac ultricies dui '
                'bibendum. Aenean feugiat, eros nec consequat egestas, nibh '
                'leo posuere sem, non tristique nunc magna sit amet leo. Cras '
                'ultrices lorem fringilla urna commodo euismod. Ut pulvinar '
                'dictum tortor placerat convallis. Praesent eget ante a nisi '
                'ornare vestibulum mattis id velit. Cras dictum sapien enim, '
                'id placerat orci fringilla eget. Etiam in fermentum nisi. '
                'Maecenas ac tortor nunc.\n'
                '\n'
                'Donec ex ligula, ultricies sed laoreet vel, posuere ut '
       

In [7]:
package.validate()

{'valid': True,
 'errors': [],
 'tasks': [{'name': 'capacity',
            'type': 'table',
            'valid': True,
            'place': './capacity.csv',
            'labels': ['grid_id', 'capacity_mw', 'fuel_type', 'year'],
            'stats': {'errors': 0,
                      'seconds': 0.004,
                      'md5': 'cd51cd1ad519075f702c958ff5bad12d',
                      'sha256': '1f151eca80760575509dfb006652d365c9a83c18276a3be5e525cb8768edba3c',
                      'bytes': 7561,
                      'fields': 4,
                      'rows': 492},
            'errors': []}]}

In [8]:
package.to_json('test_datapackage.json')

'{\n  "name": "test_datapackage",\n  "title": "Test Package for Capacity",\n  "description": " Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed fermentum eros maximus, ullamcorper neque quis, commodo lorem. Maecenas non dui vel mauris aliquam tincidunt ac eget magna. Nullam lacinia sem ac est laoreet, ac ultricies dui bibendum. Aenean feugiat, eros nec consequat egestas, nibh leo posuere sem, non tristique nunc magna sit amet leo. Cras ultrices lorem fringilla urna commodo euismod. Ut pulvinar dictum tortor placerat convallis. Praesent eget ante a nisi ornare vestibulum mattis id velit. Cras dictum sapien enim, id placerat orci fringilla eget. Etiam in fermentum nisi. Maecenas ac tortor nunc.\\n\\nDonec ex ligula, ultricies sed laoreet vel, posuere ut turpis. Nulla in leo ac leo auctor dignissim. Vestibulum eu enim at felis lacinia hendrerit. Suspendisse potenti. Nullam vel leo vestibulum nisl vehicula aliquam sit amet id ligula. Mauris nec mauris libero. Interdum et malesu