In [2]:
import os
from frictionless import Package, portals, Catalog

In [3]:
from dotenv import load_dotenv
load_dotenv()

# OR, the same with increased verbosity
load_dotenv(verbose=True)

# OR, explicitly providing path to '.env'
from pathlib import Path  # Python 3.6+ only
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

apikey = os.environ.get('ZENODO_ACCESS_TOKEN')
apikey_sandbox = os.environ.get('ZENODO_ACCESS_TOKEN_SANDBOX')
# print(apikey)
print(apikey_sandbox)

7xJDd3If8ErUMUocg1XWqCTG4bl7zAZnrwKu2fRXcnAidE7PlfDUBbU0QajD


### Without apikey 
60 requests per minute, 2000 requests per hour
### With apikey 
100 requests per minute, 5000 requests per hour

# Reading

In [3]:
# What is a record in zenodo?
# Zenodo is a collection of research papers and data, each research paper and data is called a record.

# WITHOUT DESCRIPTOR
url = "https://zenodo.org/record/7078768"
package_wod = Package(url)
print(package_wod)

{'title': 'Frictionless Data Test Dataset Without Descriptor',
 'resources': [{'name': 'capitals',
                'type': 'table',
                'path': 'capitals.csv',
                'scheme': 'https',
                'format': 'csv',
                'encoding': 'utf-8',
                'mediatype': 'text/csv',
                'dialect': {'csv': {'skipInitialSpace': True}},
                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
                                      {'name': 'cid', 'type': 'integer'},
                                      {'name': 'name', 'type': 'string'}]}},
               {'name': 'table',
                'type': 'table',
                'path': 'table.xls',
                'scheme': 'https',
                'format': 'xls',
                'encoding': 'utf-8',
                'mediatype': 'application/vnd.ms-excel',
                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
                                      {'name': 'name', '

In [4]:
# WITH DESCRIPTOR
# Default supported file formats: "csv", "tsv", "xlsx", "xls", "jsonl", "ndjson"

url = "https://zenodo.org/record/7078760"
package_wd = Package(url)
print(package_wd)

{'name': 'testing',
 'title': 'Frictionless Data Test Dataset',
 'resources': [{'name': 'data',
                'path': 'data.csv',
                'schema': {'fields': [{'name': 'id',
                                       'type': 'string',
                                       'constraints': {'required': True}},
                                      {'name': 'name', 'type': 'string'},
                                      {'name': 'description', 'type': 'string'},
                                      {'name': 'amount', 'type': 'number'}],
                           'primaryKey': ['id']}},
               {'name': 'data2',
                'path': 'data2.csv',
                'schema': {'fields': [{'name': 'parent', 'type': 'string'},
                                      {'name': 'comment', 'type': 'string'}],
                           'foreignKeys': [{'fields': ['parent'],
                                            'reference': {'resource': 'data',
                                

In [5]:
# ACCESS RESOURCES OF A PACKAGE
# extract the resource data
print(package_wd.resources[0].to_view())
print(package_wod.resources[0].to_view())

# VALIDATE
report = package_wd.validate()
print(report)

+---------+----------------+---------------------------+--------------------+
| id      | name           | description               | amount             |
| 'A3001' | 'Taxes'        | 'Taxes we collect'        | Decimal('10000.5') |
+---------+----------------+---------------------------+--------------------+
| 'A5032' | 'Parking Fees' | 'Parking fees we collect' |  Decimal('2000.5') |
+---------+----------------+---------------------------+--------------------+

+----+-----+----------+
| id | cid | name     |
|  1 |   1 | 'London' |
+----+-----+----------+
|  2 |   2 | 'Paris'  |
+----+-----+----------+
|  3 |   3 | 'Berlin' |
+----+-----+----------+
|  4 |   4 | 'Rome'   |
+----+-----+----------+
|  5 |   5 | 'Lisbon' |
+----+-----+----------+

{'valid': True,
 'errors': [],
 'tasks': [{'valid': True,
            'name': 'data',
            'type': 'table',
            'place': 'data.csv',
            'labels': ['id', 'name', 'description', 'amount'],
            'stats': {'md5': 'c

In [8]:
# WITHOUT APIKEY

package_wod = Package(url, apikey=apikey)
print(package_wod)

{'name': 'testing',
 'title': 'Frictionless Data Test Dataset',
 'resources': [{'name': 'data',
                'path': 'data.csv',
                'schema': {'fields': [{'name': 'id',
                                       'type': 'string',
                                       'constraints': {'required': True}},
                                      {'name': 'name', 'type': 'string'},
                                      {'name': 'description', 'type': 'string'},
                                      {'name': 'amount', 'type': 'number'}],
                           'primaryKey': ['id']}},
               {'name': 'data2',
                'path': 'data2.csv',
                'schema': {'fields': [{'name': 'parent', 'type': 'string'},
                                      {'name': 'comment', 'type': 'string'}],
                           'foreignKeys': [{'fields': ['parent'],
                                            'reference': {'resource': 'data',
                                

# Catalog

In [20]:
# What is a catalog?
# Catalog is a collection of packages and we can create the catalog from multiple different records from zenodo
# repository.

control = portals.ZenodoControl(search='notes:"TDWD"', apikey=apikey)
catalog = Catalog(control=control)
print("Packages", len(catalog.packages))

Packages 2


In [21]:
catalog.packages

[{'title': 'Frictionless Data Test Dataset Without Descriptor',
  'resources': [{'name': 'capitals',
                 'type': 'table',
                 'path': 'capitals.csv',
                 'scheme': 'https',
                 'format': 'csv',
                 'encoding': 'utf-8',
                 'mediatype': 'text/csv',
                 'dialect': {'csv': {'skipInitialSpace': True}},
                 'schema': {'fields': [{'name': 'id', 'type': 'integer'},
                                       {'name': 'cid', 'type': 'integer'},
                                       {'name': 'name', 'type': 'string'}]}},
                {'name': 'table',
                 'type': 'table',
                 'path': 'table.xls',
                 'scheme': 'https',
                 'format': 'xls',
                 'encoding': 'utf-8',
                 'mediatype': 'application/vnd.ms-excel',
                 'schema': {'fields': [{'name': 'id', 'type': 'integer'},
                                    

In [13]:
# READ RESOURCE
print(catalog.packages[0].resources[0].to_view())

# VALIDATE
report = catalog.packages[0].validate()
print(report)

+----+-----+----------+
| id | cid | name     |
|  1 |   1 | 'London' |
+----+-----+----------+
|  2 |   2 | 'Paris'  |
+----+-----+----------+
|  3 |   3 | 'Berlin' |
+----+-----+----------+
|  4 |   4 | 'Rome'   |
+----+-----+----------+
|  5 |   5 | 'Lisbon' |
+----+-----+----------+

{'valid': True,
 'errors': [],
 'tasks': [{'valid': True,
            'name': 'capitals',
            'type': 'table',
            'place': 'capitals.csv',
            'labels': ['id', 'cid', 'name'],
            'stats': {'md5': '154d822b8c2aa259867067f01c0efee5',
                      'sha256': '5ec3d8a4d137891f2f19ab9d244cbc2c30a7493f895c6b8af2506d9b229ed6a8',
                      'bytes': 76,
                      'fields': 3,
                      'rows': 5,
                      'errors': 0,
                      'seconds': 2.711},
            'errors': []},
           {'valid': True,
            'name': 'table',
            'type': 'table',
            'place': 'table.xls',
            'labels'

## Catalog - Search
#### Zenodo search guide(official): https://help.zenodo.org/guides/search/

#### Search Examples
* title:"open science"
* (+description:"frictionless" +title:"Bionomia")
* publication_date:(+publication_date:[2022-10-01 TO 2022-11-01] +title:"frictionless") 

In [14]:
# BY TITLE

control = portals.ZenodoControl(search='+frictionlessdata +science', apikey=apikey)
catalog = Catalog(control=control)
print("Packages", len(catalog.packages))

# link: https://zenodo.org/search?page=1&size=20&q=%2Bfrictionlessdata%20%2Bscience

Packages 1


In [23]:
# BY TITLE AND DESCRIPTION

control = portals.ZenodoControl(search='(+description:"frictionless" +title:"Bionomia")', apikey=apikey)
catalog = Catalog(control=control)
print("Packages", len(catalog.packages))

# https://zenodo.org/search?page=1&size=20&q=(%2Bdescription:%22frictionless%22%20%2Btitle:%22Bionomia%22%20)

Packages 1


In [19]:
# BY PUBLICATION AND TITLE

control = portals.ZenodoControl(search='(+publication_date:[2022-10-01 TO 2022-11-01] +title:"frictionless")', apikey=apikey)
catalog = Catalog(control=control)
print("Packages", len(catalog.packages))

Packages 1


In [28]:
# SORT 
# ascending

catalog = Catalog(
       control=portals.ZenodoControl(
           search='creators.name"FD Tester"',
           sort="mostrecent",
           page=1,
           size=1,
       ),
   )
print(catalog)
print(catalog.packages[0].resources[0].to_view())

{'name': 'catalog',
 'packages': ['https://zenodo.org/api/files/dad5c26f-b5bc-48bd-9c3b-51bba130860a/datapackage.json']}
+----+-----+----------+
| id | cid | name     |
|  1 | '1' | 'London' |
+----+-----+----------+
|  2 | '2' | 'Paris'  |
+----+-----+----------+
|  3 | '3' | 'Berlin' |
+----+-----+----------+
|  4 | '4' | 'Rome'   |
+----+-----+----------+
|  5 | '5' | 'Lisbon' |
+----+-----+----------+



In [11]:
# descending
control = portals.ZenodoControl(
        search='creators.name:"FD Tester"', sort="-mostrecent", page=1, size=1
    )
catalog = Catalog(control=control)
print(catalog.packages[0].title)
#     assert catalog.packages[0].title == "Test Write File - Remote"

Frictionless Data Test Dataset


## Writing Data

In [10]:
control = portals.ZenodoControl(
        metafn="484/package/meta.json",
        apikey=apikey_sandbox,
        base_url="https://sandbox.zenodo.org/api/"
    )
package = Package("484/package/datapackage.json")
deposition_id = package.to_zenodo(control=control)
print(deposition_id)

datapackage.json ID = 1122739 (DOI: 10.5281/zenodo.1122739)
484/package/data.csv ID = 1122739 (DOI: 10.5281/zenodo.1122739)
484/package/data2.csv ID = 1122739 (DOI: 10.5281/zenodo.1122739)
1122739


# Controls

In [5]:
# To set the properties of the plugin

control = portals.ZenodoControl(formats=["csv"], record="7078725", apikey=apikey)
package = Package(control=control)
print(package)

{'title': 'Frictionless Data Test Dataset Multiple File Types Without '
          'Descriptor',
 'resources': [{'name': 'capitals',
                'type': 'table',
                'path': 'capitals.csv',
                'scheme': 'https',
                'format': 'csv',
                'encoding': 'utf-8',
                'mediatype': 'text/csv',
                'dialect': {'csv': {'skipInitialSpace': True}},
                'schema': {'fields': [{'name': 'id', 'type': 'integer'},
                                      {'name': 'cid', 'type': 'integer'},
                                      {'name': 'name', 'type': 'string'}]}}]}
