In [30]:
%matplotlib inline
from pico import Pico
import re
import xarray as xr
import pylab as pl
import numpy as np
import bokeh as bk
import holoviews as hv
import pandas as pd
import xarray as xr
from pprint import pprint


In [16]:
csv_file_name = './data/070917/Air_Sweep_20170712-0002.csv'
p = Pico(csv_file_name )
p
df = p.df.copy()

In [17]:
p.df.head()

Unnamed: 0,t,a,b,c
0,-1e-06,-0.017396,-0.15183,-0.005799
1,-0.0,-0.033509,-0.148381,-0.002564
2,1e-06,-0.046159,-0.145253,0.005493
3,2e-06,-0.028993,-0.148149,-0.020875
4,3e-06,-0.020371,-0.143254,-0.020447


In [47]:
class Data:
    CURRENT_VERSION = '1.0.0'
    def __init__(self, bits=14, dtype=None):
        self.version = self.CURRENT_VERSION
        self.bits = bits
        if dtype is None:
            self.dtype = np.int16
        else:
            self.dtype = dtype
            
        self.meta = {
            '__version__': self.version,
            '__dtype__': self.dtype.__name__,
            '__bits__': self.bits,
        }
        
    def compress_data_frame(self, df):
        # save meta info needed to reconstitute time
        self.meta['__delta_t__'] = delta_t = df.t.diff().median()
        self.meta['__start_index__'] = int(np.round(df.t.iloc[0] / delta_t))
        
        # no longer need time in the dataframe
        df.drop('t', axis=1, inplace=True)
        
        #  loop over all other columns saving scale values and transforming to dtype
        for col in df.columns:
            scale = df.loc[:, col].abs().max() / 2 ** self.bits
            self.meta['__scale_{}__'.format(col)] = scale
            df.loc[:, col] = (df.loc[:, col] / scale).round().astype(self.dtype)
        return df
    
    def csv_to_netcdf(self, csv_file_name, netcdf_file_name, **attrs):
        # store any additional attributes in the meta dict
        self.meta.update(**attrs)
        
        # load a pico dataframe from the csv file
        df = Pico(csv_file_name).df
        
        # compress the dataframe and store meta information for reconstitution
        df = self.compress_data_frame(df)
        
        # create an xarray dataset from the dataframe
        ds = xr.Dataset.from_dataframe(df)
        
        # set the meta information on the array
        ds.attrs = self.meta
        
        # write to netcdf
        ds.to_netcdf(netcdf_file_name)
        
    def load(self, netcdf_file_name, channel_mappings=None):
        if channel_mappings is None:
            channel_mappings = {}
            
        specified_channels = set(channel_mappings.keys())
        allowed_channels = set('abcd')
        bad_channels = specified_channels - allowed_channels
        if bad_channels:
            raise ValueError('Channel names must be taken from {}'.format(allowed_channels))
        # load the data and extract the meta
        ds = xr.open_dataset(netcdf_file_name)
        self.meta = ds.attrs
        
        # extract the scale mapping for all columns
        rex_scale = re.compile(r'__scale_([a-z])__')
        scales = {}
        for key, val in self.meta.items():
            m = rex_scale.match(key)
            if m:
                col = m.group(1)
                scales[col] = val
                
        # create a dataframe
        df = ds.to_dataframe()
        
        # reconstitute time
        df.insert(0, 't', range(len(df)))
        df.loc[:, 't'] = (self.meta['__start_index__'] + df.t) * self.meta['__delta_t__']
        
        # scale columns
        for col, scale in scales.items():
            df.loc[:, col] = scale * df.loc[:, col]
            
        # rename channels
        df.rename(columns=channel_mappings, inplace=True)
        
        # return the dataframe
        return df
        
data = Data()
df = data.load('rob.nc', channel_mappings=dict(a='rob', b='rich'))        
df.head()
        
        
        
    
        

Unnamed: 0_level_0,t,rob,rich,c
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,-1e-06,-0.01734,-0.151814,-0.006099
1,0.0,-0.033442,-0.148372,-0.002439
2,1e-06,-0.046138,-0.145229,0.006099
3,2e-06,-0.029107,-0.148147,-0.020735
4,3e-06,-0.020437,-0.143284,-0.020735


In [48]:
data.meta

OrderedDict([('__version__', '1.0.0'),
             ('__dtype__', 'int16'),
             ('__bits__', 14),
             ('first', 'rob'),
             ('last', 'decarvalho'),
             ('age', 45),
             ('height', 6.0099999999999998),
             ('__delta_t__', 1.0000000000010001e-06),
             ('__start_index__', -1),
             ('__scale_a__', 0.00030965092468261722),
             ('__scale_b__', 7.482196044921874e-05),
             ('__scale_c__', 0.0012197344970703125)])

In [46]:
Data().csv_to_netcdf(csv_file_name, 'rob.nc', first='rob', last='decarvalho', age=45, height=6.01)

In [14]:
df.head()

Unnamed: 0,t,a,b,c
0,-1e-06,-0.017396,-0.15183,-0.005799
1,-0.0,-0.033509,-0.148381,-0.002564
2,1e-06,-0.046159,-0.145253,0.005493
3,2e-06,-0.028993,-0.148149,-0.020875
4,3e-06,-0.020371,-0.143254,-0.020447


In [13]:
dt = df.t.diff().median()
np.round(df.t.iloc[0] / dt)


-1.0

In [6]:
np.int16.__name__

'int16'

In [None]:
"""
So here's what I want to do.  Change this into a bundler.  It takes a pico dataframe
and some kwarg attributes.  It transforms the dataframe into int16 values and stores scales.
It then saves the scaled dataframe and attributes to a netcdf file.
I then want a loader that scales back to floats and populates tims.

"""

class Intifier:
    def __init__(self):
        self.scales = {'stuff': 'this is a really\n long string\nwith newlines and everythin'}
        self.bits = 14
        self.dtype = np.int16
    def process(self, df):
        
#         zero_ind = df.t[df.t==0].index[0]
#         delta_t = df.t.diff().median()
#         print(zero_ind, delta_t)
        df.drop('t', axis=1, inplace=True)
        for col in df.columns:
            scale = df.loc[:, col].abs().max() / 2 ** self.bits
            self.scales[col] = scale
            df.loc[:, col] = (df.loc[:, col] / scale).round().astype(self.dtype)

df = p.df.copy()
ii = Intifier()
ii.process(df)
df.dtypes

da1 = xr.Dataset.from_dataframe(df)
da1.attrs = ii.scales
da2 = xr.Dataset.from_dataframe(p.df)
da1.to_netcdf('my_file_int.nc')
da2.to_netcdf('my_file_float.nc')

            

In [None]:
zero_ind = df.t[df.t==0].index[0]
delta_t = df.t.diff().median()
print(zero_ind, delta_t)

In [None]:
xxx = xr.open_dataset('my_file_int.nc')
yyy = xxx.to_dataframe()
xxx.attrs

In [None]:
yyy.head()

In [None]:
scale = p.df.a.abs().max() / 2**14
p.df.loc[:, 'a_int'] = (p.df.a / scale).round().astype(np.int16)
p.df.loc[:, 'a_new'] = scale * p.df.a_int
p.df.loc[:, 'res'] = p.df.a_new - p.df.a
p.df.head()

In [None]:
p.df.res.hist(bins=300)

In [None]:
p.df.t.diff().mean()

In [None]:
p.df.t.max()

In [None]:
np.log10(2**64)

In [None]:
np

In [None]:
np.log10(float(2**32))

In [None]:
177. / 8.3, 177. / 5.8


In [None]:
30993752./5655943
