In [1]:
import fsspec
import os
from ipfsspec.asyn import AsyncIPFSFileSystem
from fsspec import register_implementation
import asyncio
import io

# register_implementation(IPFSFileSystem.protocol, IPFSFileSystem)
register_implementation(AsyncIPFSFileSystem.protocol, AsyncIPFSFileSystem)

# with fsspec.open("ipfs://QmZ4tDuvesekSs4qM5ZBKpXiZGun7S2CYtEZRB3DYXkjGx", "r") as f:
#     print(f.read())
class fs:
    ipfs = fsspec.filesystem("ipfs")
    file = fsspec.filesystem("file")

Changed to local node


In [2]:
fs.ipfs.change_gateway_type = 'local'

Changed to local node


## Adding a csv dataset to IPFS

In [3]:
fs.ipfs.put(path='test/data/dataset/input/adult_data.csv', rpath='/test_dataset')

'QmZnxARhJWsCbTxiAzoRhnxHgMtoEkNJNS8DGLCBEMvm4V'

## Retrieving a dataset from IPFS

In [4]:
if fs.file.exists('test/data/dataset/output/adult_data.csv'):
    fs.file.rm('test/data/dataset/output/adult_data.csv', recursive=True)
    
print('Before: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('test/data/dataset/output/*')])

fs.ipfs.get(rpath='QmZnxARhJWsCbTxiAzoRhnxHgMtoEkNJNS8DGLCBEMvm4V', 
            lpath='test/data/dataset/output/adult_data.csv', # a filename must be given
            recursive=True, 
            return_cid=False)

print('After: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('test/data/dataset/output/*')])

Before:  []
After:  ['test/data/dataset/output/adult_data.csv']


## Building a tabular model

In [5]:
from fastai.tabular.all import *

In [6]:
df = pd.read_csv('test/data/dataset/output/adult_data.csv')

In [7]:
df.columns = [col.strip() for col in df.columns]

In [8]:
splits = RandomSplitter()(range_of(df))
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [Categorify, FillMissing, Normalize]
y_names = 'salary'
y_block = CategoryBlock()

In [9]:
to = TabularPandas(df, procs=procs, cat_names=cat_names, cont_names=cont_names,
                   y_names=y_names, y_block=y_block, splits=splits)

In [10]:
dls = to.dataloaders(bs=64)

In [11]:
learn = tabular_learner(dls, [200,100], metrics=accuracy)

In [12]:
learn.fit_one_cycle(3, 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,0.380294,0.361464,0.832924,00:12
1,0.35858,0.353894,0.836609,00:10
2,0.344878,0.350269,0.839681,00:15


In [13]:
learn.export('test/data/model/input/testmodel.pkl')

## Adding model+config_files to IPFS

In [14]:
model_cid = fs.ipfs.put(path='test/data/model/input/testmodel.pkl', rpath='/test_model')
model_cid

'QmdX8AKzfpSo48cDbvaRthrs6BATDQbEDvUarxLskyoTP7'

## Retrieving model+config_files from IPFS

In [15]:
if fs.file.exists('test/data/model/output/testmodel.pkl'):
    fs.file.rm('test/data/model/output/testmodel.pkl', recursive=True)
    
print('Before: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('test/data/model/output/*')])

fs.ipfs.get(rpath=model_cid, 
            lpath='test/data/model/output/testmodel.pkl', # a filename must be given
            recursive=True, 
            return_cid=False)

print('After: ', [p.lstrip(os.getcwd()) for p in fs.file.glob('test/data/model/output/*')])

Before:  []
After:  ['test/data/model/output/testmodel.pkl']


## Doing inference with retrieved model

In [16]:
learn = load_learner('test/data/model/output/testmodel.pkl')

In [17]:
dl = learn.dls.test_dl(df.iloc[:10])

In [18]:
learn.get_preds(dl=dl)

(tensor([[0.9597, 0.0403],
         [0.3615, 0.6385],
         [0.9680, 0.0320],
         [0.8451, 0.1549],
         [0.3720, 0.6280],
         [0.1519, 0.8481],
         [0.9848, 0.0152],
         [0.4713, 0.5287],
         [0.8843, 0.1157],
         [0.1715, 0.8285]]),
 tensor([[0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [0],
         [1],
         [1],
         [1]], dtype=torch.int8))