In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import os
import time
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cellpy
from cellpy import cellreader

%matplotlib inline

In [4]:
from cellpy import log
log.setup_logging(default_level="INFO", custom_log_dir=os.getcwd())

In [5]:
filename = Path("/Users/jepe/Arbeid/Data/celldata/20171120_nb034_11_cc.h5")
print(f"size: {filename.stat().st_size/1_048_576}")

my_data = cellreader.CellpyData()
my_data.load(filename)
dataset = my_data.dataset
dataset.step_table.head()
print(dataset.step_table.columns)

size: 17.35070037841797
(cellpy) - created CellpyData instance
Index(['cycle', 'step', 'I_avr', 'I_std', 'I_max', 'I_min', 'I_start', 'I_end',
       'I_delta', 'I_rate', 'V_avr', 'V_std', 'V_max', 'V_min', 'V_start',
       'V_end', 'V_delta', 'V_rate', 'Charge_avr', 'Charge_std', 'Charge_max',
       'Charge_min', 'Charge_start', 'Charge_end', 'Charge_delta',
       'Charge_rate', 'Discharge_avr', 'Discharge_std', 'Discharge_max',
       'Discharge_min', 'Discharge_start', 'Discharge_end', 'Discharge_delta',
       'Discharge_rate', 'IR', 'IR_pct_change', 'type', 'info'],
      dtype='object')




In [6]:
dataset.dfdata.columns

Index(['Test_ID', 'Data_Point', 'Test_Time', 'Step_Time', 'DateTime',
       'Step_Index', 'Cycle_Index', 'Is_FC_Data', 'Current', 'Voltage',
       'Charge_Capacity', 'Discharge_Capacity', 'Charge_Energy',
       'Discharge_Energy', 'dV/dt', 'Internal_Resistance', 'AC_Impedance',
       'ACI_Phase_Angle', 'IR_pct_change'],
      dtype='object')

In [7]:
dataset.dfdata.head(10)

Unnamed: 0,Test_ID,Data_Point,Test_Time,Step_Time,DateTime,Step_Index,Cycle_Index,Is_FC_Data,Current,Voltage,Charge_Capacity,Discharge_Capacity,Charge_Energy,Discharge_Energy,dV/dt,Internal_Resistance,AC_Impedance,ACI_Phase_Angle,IR_pct_change
0,1,1,300.01986,299.96663,43059.534479,1,1,0,0.0,3.211527,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,1,2,600.034078,599.980848,43059.537951,1,1,0,0.0,3.209682,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2,1,3,900.04836,899.99513,43059.541424,1,1,0,0.0,3.207529,0.0,0.0,0.0,0.0,-6.1e-05,0.0,0.0,0.0,
3,1,4,1200.062845,1200.009615,43059.544896,1,1,0,0.0,3.205992,0.0,0.0,0.0,0.0,6.1e-05,0.0,0.0,0.0,
4,1,5,1500.076886,1500.023655,43059.548368,1,1,0,0.0,3.204147,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
5,1,6,1800.091137,1800.037906,43059.55184,1,1,0,0.0,3.202302,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
6,1,7,2100.105355,2100.052125,43059.555313,1,1,0,0.0,3.200456,0.0,0.0,0.0,0.0,-6.1e-05,0.0,0.0,0.0,
7,1,8,2400.135231,2400.082001,43059.558785,1,1,0,0.0,3.198919,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
8,1,9,2700.14967,2700.09644,43059.562257,1,1,0,0.0,3.197381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
9,1,10,3000.163718,3000.110488,43059.565729,1,1,0,0.0,3.195536,0.0,0.0,0.0,0.0,-6.1e-05,0.0,0.0,0.0,


## Some notes
- should rename the tables consistently
  - *e.g.* dfsummary, dfdata, dfinfo, dfsteps, dffid
  - have to take care so that it also can read "old" cellpy-files
- should make (or check if it is already made) an option for giving a "custom" config-file in starting the session

In [8]:
my_data.make_step_table()

In [9]:
filename2 = Path("/Users/jepe/Arbeid/Data/celldata/20171120_nb034_11_cc.nh5")
my_data.save(filename2)

In [10]:
print(f"size: {filename2.stat().st_size/1_048_576} MB")

size: 5.137908935546875 MB


In [11]:
my_data2 = cellreader.CellpyData()
my_data2.load(filename2)
dataset2 = my_data2.dataset
print(dataset2.step_table.columns)
del my_data2
del dataset2

(cellpy) - created CellpyData instance
Index(['cycle', 'step', 'sub_step', 'point_avr', 'point_std', 'point_min',
       'point_max', 'point_first', 'point_last', 'point_delta',
       'step_time_avr', 'step_time_std', 'step_time_min', 'step_time_max',
       'step_time_first', 'step_time_last', 'step_time_delta', 'current_avr',
       'current_std', 'current_min', 'current_max', 'current_first',
       'current_last', 'current_delta', 'voltage_avr', 'voltage_std',
       'voltage_min', 'voltage_max', 'voltage_first', 'voltage_last',
       'voltage_delta', 'charge_avr', 'charge_std', 'charge_min', 'charge_max',
       'charge_first', 'charge_last', 'charge_delta', 'discharge_avr',
       'discharge_std', 'discharge_min', 'discharge_max', 'discharge_first',
       'discharge_last', 'discharge_delta', 'ir_avr', 'ir_std', 'ir_min',
       'ir_max', 'ir_first', 'ir_last', 'ir_delta', 'type', 'sub_type',
       'info'],
      dtype='object')


In [12]:
# next: dont load the full hdf5-file, only get datapoints for a cycle from step_table
# then: query the hdf5-file for the data (and time it)
# ex: store.select('/CellpyData/dfdata', "data_point>20130104 & data_point<20130104 & columns=['A', 'B']")

In [13]:
infoname = '/CellpyData/info'
dataname = '/CellpyData/dfdata'
summaryname = '/CellpyData/dfsummary'
fidname = '/CellpyData/fidtable'
stepname = '/CellpyData/step_table'

store = pd.HDFStore(filename2)
store.select('/CellpyData/dfdata', where="index>21 and index<32")
store.select('/CellpyData/dfdata', "index>21 & index<32 & columns=['Test_Time', 'Step_Index']")

Unnamed: 0_level_0,Test_Time,Step_Index
Data_Point,Unnamed: 1_level_1,Unnamed: 2_level_1
22,6600.350393,1
23,6900.365423,1
24,7200.378906,1
25,7500.393105,1
26,7800.407402,1
27,8100.421654,1
28,8400.435929,1
29,8700.450196,1
30,9000.464412,1
31,9300.478659,1


## Querying cellpy file (hdf5)
1. load steptable
2. get the stepnumbers for given cycle
3. create query and run it
4. scale the charge (100_000/mass)

In [14]:
steptable = store.select(stepname)

In [15]:
s = my_data.get_step_numbers(
    steptype='charge',
    allctypes=True,
    pdtype=True,
    cycle_number=None,
    steptable=steptable
)
cycle_mask = (s["cycle"] == 2) # also possible to give cycle_number in get_step_number instead

In [None]:
s.head()

In [None]:
a = s.loc[cycle_mask, ["point_first", "point_last"]].values[0]

In [None]:
v_hdr = "Voltage"
c_hdr = "Charge_Capacity"
d_hdr = "Discharge_Capacity"
i_hdr = "Current"

In [None]:
q = f"index>={ a[0] } & index<={ a[1] }"

In [None]:
q += f"& columns = ['{c_hdr}', '{v_hdr}']"

In [None]:
mass = dataset.mass
print(f"mass from dataset.mass = {mass:5.4} mg")

In [None]:
%%timeit
my_data.get_ccap(2)

In [None]:
%%timeit
c2 = store.select('/CellpyData/dfdata', q)
c2[c_hdr] = c2[c_hdr] * 1000000 / mass


In [None]:
5.03/3.05

### Result
- 65% penalty for using "hdf5" query lookup
- 5.03 vs 3.05 ms

In [None]:
plt.plot(c2[c_hdr], c2[v_hdr])

In [None]:
store.close()