#  Frictionless Packages on IPFS

In [1]:
%pip install frictionless --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
from frictionless import Package
from frictionless import Resource

import pandas as pd

## Metadata on IPFS

I've uploaded an example of a Frictionless Data package to IPFS (`bafybeierqai7xkaxkyakdynw5uq7f2g4o5uz3kzvnh55thmazcff3bwgse`). 

The package is a simple CSV file with a schema and a data file that lives outside of IPFS.

In [4]:
ipfs_package = Package(
    "https://bafybeierqai7xkaxkyakdynw5uq7f2g4o5uz3kzvnh55thmazcff3bwgse.ipfs.w3s.link/ipfs/bafybeierqai7xkaxkyakdynw5uq7f2g4o5uz3kzvnh55thmazcff3bwgse/ipfs_datapackage.yaml"
)

In [5]:
print(ipfs_package.to_yaml())

$frictionless: package/v2
name: co2-mm-mlo
title: Trends in Atmospheric Carbon Dioxide
resources:
  - name: co2_mm_mlo
    type: table
    path: https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.csv
    scheme: https
    format: csv
    mediatype: text/csv



With the IPFS plugin this will be `Package("ipfs://bafybeierqai7xkaxkyakdynw5uq7f2g4o5uz3kzvnh55thmazcff3bwgse")`

In [6]:
r = ipfs_package.get_resource("co2_mm_mlo")

In [7]:
ipfs_package.get_resource("co2_mm_mlo").read_rows()[50:60]

[{'# of the GML data': '1960', 'namely well documented model code': '4', 'transport': '1960.2896'},
 {'# of the GML data': '1960', 'namely well documented model code': '5', 'transport': '1960.3716'},
 {'# of the GML data': '1960', 'namely well documented model code': '6', 'transport': '1960.4563'},
 {'# of the GML data': '1960', 'namely well documented model code': '7', 'transport': '1960.5383'},
 {'# of the GML data': '1960', 'namely well documented model code': '8', 'transport': '1960.6230'},
 {'# of the GML data': '1960', 'namely well documented model code': '9', 'transport': '1960.7077'},
 {'# of the GML data': '1960', 'namely well documented model code': '10', 'transport': '1960.7896'},
 {'# of the GML data': '1960', 'namely well documented model code': '11', 'transport': '1960.8743'},
 {'# of the GML data': '1960', 'namely well documented model code': '12', 'transport': '1960.9563'},
 {'# of the GML data': '1961', 'namely well documented model code': '1', 'transport': '1961.0411'

## Data on IPFS

In [8]:
ipfs_resource = Resource(
    "https://bafybeibpm3aubiynqgdxdrggvmucr6sf7gi4kshng5py3uxbydqsw5reoa.ipfs.w3s.link/ipfs/bafybeibpm3aubiynqgdxdrggvmucr6sf7gi4kshng5py3uxbydqsw5reoa/co2_mm_mlo.csv"
)

In [9]:
native_ipfs_package = Package(
    name="co2-mm-mlo",
    title="Trends in Atmospheric Carbon Dioxide",
    resources=[ipfs_resource],
)

In [10]:
print(native_ipfs_package.to_yaml())

$frictionless: package/v2
name: co2-mm-mlo
title: Trends in Atmospheric Carbon Dioxide
resources:
  - name: co2_mm_mlo
    type: table
    path: https://bafybeibpm3aubiynqgdxdrggvmucr6sf7gi4kshng5py3uxbydqsw5reoa.ipfs.w3s.link/ipfs/bafybeibpm3aubiynqgdxdrggvmucr6sf7gi4kshng5py3uxbydqsw5reoa/co2_mm_mlo.csv
    scheme: https
    format: csv
    mediatype: text/csv



In [11]:
ipfs_resource.to_pandas()[50:60]

Unnamed: 0,# of the GML data,namely well documented model code,transport
50,1960,4,1960.2896
51,1960,5,1960.3716
52,1960,6,1960.4563
53,1960,7,1960.5383
54,1960,8,1960.623
55,1960,9,1960.7077
56,1960,10,1960.7896
57,1960,11,1960.8743
58,1960,12,1960.9563
59,1961,1,1961.0411


### Native IPFS support

In [None]:
# From ChatGPT :seenoevil:

# from frictionless import File
# import fsspec
# from frictionless.plugins import Plugin

# class FsspecFile(File):
#     def __init__(self, source, **options):
#         super().__init__(source, **options)

#     def read_list_stream_create(self):
#         fs, path = fsspec.core.url_to_fs(self.source)
#         return fs.open(path)

# class FsspecPlugin(Plugin):
#     def create_file(self, source, **options):
#         return FsspecFile(source, **options)

# from frictionless import system

# system.plugin_attach(FsspecPlugin())

In [36]:
# from frictionless import Plugin, system
# from frictionless.schemes.remote import RemoteLoader

In [37]:
# class IPFSRemotePlugin(Plugin):
#     """Plugin for Remote Data"""

#     def create_loader(self, resource):
#         if resource.scheme in ["ipfs"]:
#             cid = resource.path
#             resource.path = f"https://{cid}.ipfs.w3s.link/ipfs/{cid}"
#             return RemoteLoader(resource)

# system.register('ipfs', IPFSRemotePlugin())

In [53]:
# ipfs_resource = Resource("bafybeibpm3aubiynqgdxdrggvmucr6sf7gi4kshng5py3uxbydqsw5reoa", scheme="ipfs")
# native_ipfs_package = Package(
#     name="co2-mm-mlo",
#     title="Trends in Atmospheric Carbon Dioxide",
#     resources=[ipfs_resource],
# )

In [54]:
# print(native_ipfs_package.to_yaml())

$frictionless: package/v2
name: co2-mm-mlo
title: Trends in Atmospheric Carbon Dioxide
resources:
  - name: bafybeibpm3aubiynqgdxdrggvmucr6sf7gi4kshng5py3uxbydqsw5reoa
    type: file
    path: bafybeibpm3aubiynqgdxdrggvmucr6sf7gi4kshng5py3uxbydqsw5reoa
    scheme: ipfs
    format: ''



## Bringing Data from Other Places

In [12]:
# Use Bacalhau to run frictionless pull and update the data package with the CIDs