In [1]:
!cat /opt/dafnepy/pyproject.toml

[build-system]
requires = ["flit_core >=2,<3"]
build-backend = "flit_core.buildapi"

[tool.flit.metadata]
module = "dafnedset"
author = "Javier J. Clavijo"
author-email = "jclavijo@gmail.com"
home-page = "https://jjclavijo.github.io/pydafne/"
classifiers = ["License :: OSI Approved :: MIT License"]
requires = [
    "numpy",
    "pandas",
    "pyarrow",
    "psycopg2-binary",
    "shapely"
]
requires-python=">=3.3"

[tool.flit.scripts]
dafne_dataserver = "dafnedset.serve:main"
dafne_writeparquet = "dafnedset.parquet:save"


In [3]:
!conda install -y -c conda-forge flit numpy pandas pyarrow psycopg2-binary shapely

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.10.1
  latest version: 4.10.3

Please update conda by running

    $ conda update -n base conda



## Package Plan ##

  environment location: /opt/conda

  added / updated specs:
    - flit
    - numpy
    - pandas
    - psycopg2-binary
    - pyarrow
    - shapely


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    abseil-cpp-20210324.2      |       h9c3ff4c_0        1010 KB  conda-forge
    arrow-cpp-4.0.1            |py39h881cbb8_2_cpu        22.2 MB  conda-forge
    aws-c-cal-0.5.11           |       h95a6274_0          37 KB  conda-forge
    aws-c-common-0.6.2         |       h7f98852_0         168 KB  conda-forge
    aws-c-event-stream-0.2.7   |      h3541f99_13          47 KB  conda-forge
    aws-c-io-0.10.5            |       hfb6a706_0         121 KB  conda-forge
    aws-chec

In [4]:
!cd /opt//dafnepy; flit install -s

[?1l>Fetching list of valid trove classifiers                         [32mI-flit.validate[m
Extras to install for deps 'all': {'.none'}                       [32mI-flit.install[m
Installing requirements                                           [32mI-flit.install[m
Symlinking dafnedset -> /opt/conda/lib/python3.9/site-packages/dafnedset  [32mI-flit.install[m
Writing script to /opt/conda/bin/dafne_dataserver                 [32mI-flit.install[m
Writing script to /opt/conda/bin/dafne_writeparquet               [32mI-flit.install[m


In [1]:
import numpy as np
from dafnedset import base_simple as bs

In [2]:
opts = bs.FunBufferOptions(batch_size=30,niter=2)
buf = bs.FunBuffer(options=opts, providers=[[np.ones(13)*i for i in range(14)]])

In [3]:
opts = bs.FunBufferOptions(batch_size=30,niter=2)
buf2 = bs.FunBuffer(options=opts, providers=[[np.ones(13)*i for i in range(14)]])

In [4]:
buf + buf2

<dafnedset.base_simple.FunBuffer at 0x7faefba38760>

In [5]:
buf = buf + buf2

In [6]:
a = bs.fill_streams(buf)

In [7]:
a

[<method-wrapper '__next__' of list_iterator object at 0x7faefba38d30>,
 <method-wrapper '__next__' of list_iterator object at 0x7faefba38a90>]

In [8]:
buf.buffer

In [9]:
bs.advance(buf)

(None, [])

In [10]:
try:
    buf.fill_buffer()
except StopIteration:
    test = True
assert test is True

In [11]:
buf.fill_streams()

In [12]:
buf.streams

[<method-wrapper '__next__' of list_iterator object at 0x7faefba38eb0>,
 <method-wrapper '__next__' of list_iterator object at 0x7faefba38e50>]

In [13]:
buf.fill_buffer()

In [14]:
buf.buffer

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])]

In [15]:
buf.iteration

[1, 1]

In [16]:
bbb = bs.fill_streams(buf)

In [17]:
bbb

[None, None]

In [18]:
buf.streams

[<method-wrapper '__next__' of list_iterator object at 0x7faefba38eb0>,
 <method-wrapper '__next__' of list_iterator object at 0x7faefba38e50>]

In [19]:
bs.combine_streams(buf.streams,bbb)

([<method-wrapper '__next__' of list_iterator object at 0x7faefba38eb0>,
  <method-wrapper '__next__' of list_iterator object at 0x7faefba38e50>],
 [0, 0])

In [20]:
tot=0

In [21]:
sasa = next(buf)
print(sasa)
print(len(sasa))
print(buf.buffer)
tot = tot+len(sasa)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1.]
30
[array([1., 1., 1., 1., 1., 1., 1., 1., 1.]), array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])]


In [22]:
tot/13/2/2

0.5769230769230769

In [23]:
buf.buffer

[array([1., 1., 1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])]

In [24]:
buf.cache

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1.])]

In [25]:
from dafnedset import datasets as ds

In [29]:
from dafnedset import fun_ops as fop

In [26]:
from importlib import reload

In [107]:
fop = reload(fop)

In [31]:
ds.CASES

{'size': 'SELECT max(iid) FROM indice_c;',
 'query': "\n                      SELECT a.estacion, a.sid, s.time, a.tiempo, a.norte, a.este, a.altura\n                      FROM\n                        (SELECT max(estacion) estacion, max(sid) sid,\n                                array_agg(to_date(i.yymmmdd,'YYMONDD')\n                                    ORDER BY to_date(i.yymmmdd,'YYMONDD') ) tiempo,\n                                array_agg(north::real ORDER BY to_date(i.yymmmdd,'YYMONDD') ) norte,\n                                array_agg(east::real ORDER BY to_date(i.yymmmdd,'YYMONDD') ) este,\n                                array_agg(up::real ORDER BY to_date(i.yymmmdd,'YYMONDD') ) altura\n                         FROM indice_c i\n                         LEFT JOIN http_tseries t USING (estacion, yymmmdd)\n                         GROUP BY iid HAVING iid <@ int8range(%s,%s) ) AS a\n                      JOIN usgs_sismos s ON a.sid = s.ogc_fid;\n                      ",
 'columns

In [32]:
cfb = fop.read_db(ds.CASES,batch_size=123)

In [33]:
for i in cfb:
    batch = i
    print(f'{len(batch)}',end='...')

123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...36...

In [34]:
fop.write_parquet(fop.read_db(ds.CASES),'casos.pq')

In [35]:
l=0
for i in fop.read_parquet('casos.pq'):
    batch = i
    l += len(batch)
    print(f'{len(batch)}, accum {l}',end='...')

100, accum 100...100, accum 200...100, accum 300...100, accum 400...100, accum 500...100, accum 600...100, accum 700...100, accum 800...100, accum 900...100, accum 1000...100, accum 1100...100, accum 1200...100, accum 1300...100, accum 1400...100, accum 1500...100, accum 1600...100, accum 1700...100, accum 1800...100, accum 1900...100, accum 2000...100, accum 2100...100, accum 2200...100, accum 2300...100, accum 2400...100, accum 2500...100, accum 2600...100, accum 2700...100, accum 2800...100, accum 2900...100, accum 3000...100, accum 3100...100, accum 3200...100, accum 3300...100, accum 3400...100, accum 3500...100, accum 3600...100, accum 3700...100, accum 3800...100, accum 3900...100, accum 4000...100, accum 4100...100, accum 4200...100, accum 4300...100, accum 4400...100, accum 4500...100, accum 4600...100, accum 4700...100, accum 4800...100, accum 4900...100, accum 5000...100, accum 5100...100, accum 5200...100, accum 5300...100, accum 5400...100, accum 5500...100, accum 5600...1

In [36]:
from dafnedset import fun_transformations as ftr

In [37]:
opts = bs.FunBufferOptions(batch_size=30,niter=2)
cfb = fop.read_db(ds.CASES,batch_size=123)
provider = map(ftr.label_batch([1.,0.]),cfb)

buf = bs.FunBuffer(options=opts, providers=[provider])

In [38]:
for i in buf:
    batch = i
    print(f'{len(batch)}',end='...')

30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...30...

In [39]:
opts = bs.FunBufferOptions(batch_size=30,niter=2)
cfb = fop.read_db(ds.CASES,batch_size=123)
label_tr = ftr.label_batch([1.,0.])

buf = cfb.map(label_tr)

In [40]:
for i in buf:
    batch = i
    print(f'{len(batch)}',end='...')

123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...36...

In [44]:
len(batch.column('etiqueta').tolist())

36

In [80]:
from dafnedset import split_simple as fss

In [91]:
fss = reload(fss)

In [92]:
cfb = fop.read_db(ds.CASES,batch_size=123)
spliter = fss.FunSplitter(cfb,[1.,2.,1.])

In [93]:
for i in spliter:
    print(len(i),[len(j) for j in i],sum([len(j) for j in i]),sep='->',end='...')

3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62, 30]->123...3->[31, 62,

In [94]:
pt1 = fss.FunPart(spliter,1)

In [95]:
for i in pt1:
    print(len(i),end='...')

62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...18...

In [96]:
cfb = fop.read_db(ds.CASES,batch_size=123)
spliter = fss.FunSplitter(cfb,[1.,2.,1.])

In [97]:
pt1 = fss.FunPart(spliter,1)

for i in pt1:
    print(len(i),end='...')

62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...62...18...

In [99]:
len(spliter.cache[0])

80

In [100]:
cfb = fop.read_db(ds.CASES,batch_size=123)
spliter = fss.FunSplitter(cfb,[1.,2.,1.])

In [101]:
for i in spliter[1]:
    print(len(i),end='...')

123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...123...119...

In [1]:
from dafnedset.presets_simple.pos10 import data

In [2]:
for i in data:
    print(len(i),end='...')

100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...28...

In [5]:
from dafnedset import base_simple as bs

In [11]:
buf = bs.FunBuffer(options=data.options,providers=data.providers)

In [9]:
buf

<dafnedset.base_simple.FunBuffer at 0x7ff9a26aae50>

In [12]:
#OBS: Los splitters se resetean solos
for i in buf:
    print(len(i),end='...')

100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...100...28...