In [15]:
import numpy as np
import pickle
from pathlib import Path

<b>Data format</b><br>
1 Row = 1 Cycle<br>
Each cycle so far has:
 - Qdlin (1000,1)
 - Tdlin (1000,1)
 - Cdlin (1000,1)
 - discharge_time (1,)
 - IR (1,)
 - remaining_cycle_life (1,)

In [16]:
# only taking batch1 for testing

path1 = Path("Data/batch1.pkl")
batch1 = pickle.load(open(path1, 'rb'))

# remove batteries that do not reach 80% capacity
del batch1['b1c8']
del batch1['b1c10']
del batch1['b1c12']
del batch1['b1c13']
del batch1['b1c22']

# Writing to TFrecord

In [221]:
import tensorflow as tf
from tensorflow.train import FloatList, Int64List
from tensorflow.train import Feature, Features, Example
import os

In [275]:
# see also Hands-On Machine Learning pp.403

def get_cycle_features(cell, idx):
    cycle_example = Example(
        features=Features(
            feature={
                "IR": Feature(float_list=FloatList(value=[batch1[cell]["summary"]["IR"][idx]])),
                "Qdlin": Feature(float_list=FloatList(value=batch1[cell]["cycles"][str(idx)]["Qdlin"])),
                "Tdlin": Feature(float_list=FloatList(value=batch1[cell]["cycles"][str(idx)]["Tdlin"])),
                "Remaining_cycles": Feature(int64_list=Int64List(value=[int(batch1[cell]["cycle_life"]-idx)]))
            }
        )
    )
    return cycle_example


for cell in batch1:
    filename = os.path.join("Data/tfrecords/" + cell + ".tfrecord")
    with tf.io.TFRecordWriter(filename) as f:
        # the range (1,10 is important to keep the first record out, which is all Nan/0)
        for cycle in range(int(batch1[cell]["cycle_life"])-1):
            cycle_to_write = get_cycle_features(cell, cycle)
            f.write(cycle_to_write.SerializeToString())
    break # only writes first cell, remove this to write one file for each battery in batch1

# Reading from TFrecord

In [266]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, DenseFeatures
from tensorflow.feature_column import numeric_column, make_parse_example_spec

In [276]:
cell = "b1c0"
raw_dataset = tf.data.TFRecordDataset(["Data/tfrecords/" + cell + ".tfrecord"]).skip(1)

In [277]:
# VERSION 1
# reading manually

feature_description = {
    "IR": tf.io.FixedLenFeature(shape=[], dtype=tf.float32),
    "Qdlin": tf.io.FixedLenFeature(shape=[1000], dtype=tf.float32),
    "Tdlin": tf.io.FixedLenFeature(shape=[1000], dtype=tf.float32),
    "Remaining_cycles": tf.io.FixedLenFeature(shape=[], dtype=tf.int64)
}
    
def _parse_features(example_proto):
    examples = tf.io.parse_single_example(example_proto, feature_description)
    targets = examples.pop("Remaining_cycles")
    return examples, targets

dataset = raw_dataset.map(_parse_features).batch(5)

In [278]:
# VERSION 2
# reading with feature columns

ir = numeric_column("IR", shape=[])
qdlin = numeric_column("Qdlin", shape=[1000])
tdlin = numeric_column("Tdlin", shape=[1000])
rem_cycles = numeric_column("Remaining_cycles", shape=[], dtype=tf.int64)
columns = [ir, qdlin, tdlin, rem_cycles]

def _parse_features(example_proto):
    examples = tf.io.parse_single_example(example_proto, make_parse_example_spec(columns))
    targets = examples.pop("Remaining_cycles")
    return examples, targets

dataset = raw_dataset.map(_parse_features).batch(5)

In [None]:
for examples, target in dataset:
    print(examples, target)

# Feed dataset into model

In [280]:
#create model
model = Sequential()
model.add(DenseFeatures(feature_columns=columns[:-1]))
model.add(Dense(36))
model.add(Activation("relu"))
model.add(Dense(18))
model.add(Activation("relu"))
model.add(Dense(1))
model.add(Activation("sigmoid"))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(dataset, epochs=10)

If we store each cycle in a row and select cycle batches randomly, how do we avoid, that one batch takes the last few cycles from one cell and the first few of the next? -> Maybe we need to create a TFrecords file for each cell.

If we set a batch size for training, do we also need as many cycles for any prediction?

Do we need to keep the order of detail-level data (e.g. Qdlin, Tdlin)? They are a time-series.

# MORE STUFF
# Example with FeatureList

In [None]:
import tensorflow as tf
from tensorflow.train import FloatList
from tensorflow.train import Feature, Features, FeatureList, FeatureLists, SequenceExample

In [None]:
# Write 
cell = batch1["b1c0"]

ir = Feature(float_list=FloatList(value=[cell["summary"]["IR"][1]]))
qdlin = Feature(float_list=FloatList(value=cycle["Qdlin"]))
tdlin = Feature(float_list=FloatList(value=cycle["Tdlin"]))

detail_features = FeatureList(feature=[qdlin, tdlin])

cycle_example = SequenceExample(
    context = Features(feature={"IR":ir}),
    feature_lists = FeatureLists(feature_list={"Details":detail_features})
)

with tf.io.TFRecordWriter("my_aligned_cycle.tfrecord") as f:
    f.write(cycle_example.SerializeToString())

In [None]:
# Read
context_feature_description = {
    "IR": tf.io.FixedLenFeature([], tf.float32, default_value=0)
}

sequence_feature_description = {
    "Details": tf.io.FixedLenSequenceFeature([1000], tf.float32),
}

for serialized_example in tf.data.TFRecordDataset(["my_aligned_cycle.tfrecord"]):
    parsed_example = tf.io.parse_single_sequence_example(
        serialized_example,
        context_feature_description,
        sequence_feature_description
    )
    print(parsed_example[1]["Details"][0])