In [1]:
import pandas
import os
import glob
import datetime
from collections import OrderedDict

import magic

In [3]:
import apes
from apes.core.basedata import BaseData, SourceData, CSVSourceData


In [4]:
file_name = 'sourcedata/sample.csv'
tagl = ['gdp', '2018', '2017', '2013', 'poverty']


### BaseData

In [5]:
based = BaseData(file_name,
                 name='sample',
                 info='Sample of CAGDP9', tag=tagl)


In [10]:
from apes.core.basedata import file_size_format
file_size_format(99999999999, force_unit='MB')

'95367.43 MB'

In [13]:
# size check
bs = based.attr.size

unit_dict = OrderedDict([
    ('Bytes', 1),
    ('KB', 1024),
    ('MB', 1024 ** 2),
    ('GB', 1024 ** 3),
])

def file_size_normalize(size):
    """
    Normalize file size with units in MB
    
    Parameters
    ----------
    size: str
        File size with units split by one space, e.g. "28.96 MB"
    """
    size_val, size_unit = size.split(' ')
    size_val = float(size_val)
    size_out = size_val * unit_dict[size_unit] / unit_dict['MB']
    return size_out

file_size_normalize(bs)

0.0182421875

In [14]:
based.attr.size

'18.68 KB'

In [None]:
based.attr.type

In [None]:
based.tag

In [None]:
based.attr

### SourceData

In [None]:
sourced = SourceData(file_name,
                     name='sample',
                     info='Sample of CAGDP9', tag=tagl,
                     url='sample_url', doi='sample_doi',
                     lic='sample_lic')


In [None]:
sourced

In [None]:
sourced.tag

### Split data

In [None]:
sfile_name = 'sourcedata/sample_split'
baseds = BaseData(sfile_name,
                  name='sample',
                  info='Sample of CAGDP9', tag=tagl)


In [None]:
baseds

### CSV Data

In [None]:
sfile_name = 'sourcedata/sample_split'
tagl = ['gdp', '2018', '2017', '2013', 'poverty']
csvsdata = CSVSourceData(sfile_name,
                         name='sample',
                         info='Sample of CAGDP9', tag=tagl)


In [None]:
csvsdata.as_df()

In [None]:
csvsdata.head()

In [None]:
file_name = 'sourcedata/sample.csv'
tagl = ['gdp', '2018', '2017', '2013', 'poverty']

csvdata = CSVSourceData(file_name,
                        name='sample',
                        info='Sample of CAGDP9', tag=tagl,
                        url='sample_url', doi='sample_doi',
                        lic='sample_lic')


In [None]:
csvdata

In [None]:
csvdata.as_df()