In [1]:
import fsspec
import os
from ipfsspec.asyn import AsyncIPFSFileSystem
from fsspec import register_implementation
import asyncio
import io

# register_implementation(IPFSFileSystem.protocol, IPFSFileSystem)
register_implementation(AsyncIPFSFileSystem.protocol, AsyncIPFSFileSystem)

# with fsspec.open("ipfs://QmZ4tDuvesekSs4qM5ZBKpXiZGun7S2CYtEZRB3DYXkjGx", "r") as f:
#     print(f.read())
class fs:
    ipfs = fsspec.filesystem("ipfs")
    file = fsspec.filesystem("file")

Changed to public node


In [2]:
fs.ipfs.change_gateway_type = 'local'

Changed to local node


## Adding a csv dataset to IPFS

In [3]:
fs.ipfs.put(path='test/data/dataset/input/adult_data.csv', rpath='/test_dataset')

'QmZnxARhJWsCbTxiAzoRhnxHgMtoEkNJNS8DGLCBEMvm4V'

## Retrieving a dataset from IPFS

In [4]:
if fs.file.exists('test/data/dataset/output/adult_data.csv'):
    fs.file.rm('test/data/dataset/output/adult_data.csv', recursive=True)
    
print('Before: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('test/data/dataset/output/*')])

fs.ipfs.get(rpath='QmZnxARhJWsCbTxiAzoRhnxHgMtoEkNJNS8DGLCBEMvm4V', 
            lpath='test/data/dataset/output/adult_data.csv', # a filename must be given
            recursive=True, 
            return_cid=False)

print('After: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('test/data/dataset/output/*')])

Before:  []
After:  ['data/dataset/output/adult_data.csv']


## Building a tabular model

In [5]:
from fastai.tabular.all import *

In [6]:
df = pd.read_csv('test/data/dataset/output/adult_data.csv')

In [14]:
df.columns = [col.strip() for col in df.columns]

In [15]:
splits = RandomSplitter()(range_of(df))
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
y_names = 'salary'
y_block = CategoryBlock()

In [16]:
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,
                   y_names=y_names, y_block=y_block, splits=splits)

In [19]:
dls = to.dataloaders(bs=64)

In [20]:
learn = tabular_learner(dls, [200,100], metrics=accuracy)

In [21]:
learn.fit_one_cycle(3, 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,0.384998,0.358777,0.835995,00:02
1,0.36184,0.360575,0.830774,00:02
2,0.346744,0.349264,0.838606,00:02


In [25]:
learn.export('test/data/model/input/testmodel.pkl')

## Adding model+config_files to IPFS

In [26]:
fs.ipfs.put(path='test/data/model/input/testmodel.pkl', rpath='/test_model')

'QmdNQoVqNvRg6kW4tvdoQcMjxCd8p79AmDfi2fjbQcpoDz'

## Retrieving model+config_files from IPFS

In [28]:
if fs.file.exists('test/data/model/output/testmodel.pkl'):
    fs.file.rm('test/data/model/output/testmodel.pkl', recursive=True)
    
print('Before: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('test/data/model/output/*')])

fs.ipfs.get(rpath='QmdNQoVqNvRg6kW4tvdoQcMjxCd8p79AmDfi2fjbQcpoDz', 
            lpath='test/data/model/output/testmodel.pkl', # a filename must be given
            recursive=True, 
            return_cid=False)

print('After: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('test/data/model/output/*')])

Before:  []
After:  ['data/model/output/testmodel.pkl']


## Doing inference with retrieved model

In [34]:
learn = load_learner('test/data/model/output/testmodel.pkl')

In [37]:
dl = learn.dls.test_dl(df.iloc[:10])

In [38]:
learn.get_preds(dl=dl)

(tensor([[0.8989, 0.1011],
         [0.3198, 0.6802],
         [0.9739, 0.0261],
         [0.8690, 0.1310],
         [0.3574, 0.6426],
         [0.1215, 0.8785],
         [0.9760, 0.0240],
         [0.5097, 0.4903],
         [0.8273, 0.1727],
         [0.1295, 0.8705]]),
 tensor([[0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [1]], dtype=torch.int8))