## Setup

In [1]:
import os
import sys
import csv
import time
import numpy as np
import itertools

In [2]:
PATH_TO_DATA = '/var/lib/cream/'
PATH_TO_CREAM = PATH_TO_DATA + 'CREAM/'
LINE_DEL = ''.join(itertools.repeat('\r', 50))

In [3]:
# Import the CREAM data utility
sys.path.append(PATH_TO_CREAM + 'data_utility/')
from data_utility import CREAM_Day

cream = CREAM_Day(PATH_TO_DATA + '2018-08-23/')

# Load the electrical component events
all_component_events = cream.load_component_events(
    PATH_TO_DATA + "component_events.csv", filter_day=False
)

In [4]:
# Create the folder for the csv files
if not os.path.isdir(PATH_TO_DATA + 'component_events'):
    os.mkdir(PATH_TO_DATA + 'component_events')

## Converting the dataset

In [5]:
start = time.time()
labeled_events = all_component_events[all_component_events.Component != 'unlabeled']

for event in labeled_events.itertuples():
    percent_done = round(100 * event.ID / 92701, 2)
    remaining = int(((time.time() - start) / (percent_done / 100)) / 60)
    print(f'{LINE_DEL}{percent_done}% finished   {remaining} minutes remaining',
          f'reading event ID-{event.ID}',
          end='', flush=True)
    cream_day = CREAM_Day(PATH_TO_DATA + str(event.Date))
    voltage, current = \
        cream_day.load_time_frame(event.Timestamp, 10, return_noise=False)
    with open(f"{PATH_TO_DATA}component_events/{event.ID}.csv", 'w') as f:
        writer = csv.writer(f)
        writer.writerow(voltage)
        writer.writerow(current)

print(f"{LINE_DEL}FINISHED!")

FINISHED!


## Analyzing performance

In [6]:
event = all_component_events[all_component_events.Component != 'unlabeled'].iloc[0]

start = time.time()
cream_day = CREAM_Day(PATH_TO_DATA + str(event.Date))
voltage, current = cream_day.load_time_frame(event.Timestamp, 10, return_noise=False)
print("Reading hdf5:\t{}s".format(round(time.time() - start, 2)))

start = time.time()
with open(f"{PATH_TO_DATA}component_events/{event.ID}.csv", 'r') as f:
    reader = csv.reader(f)
    voltage_csv = np.array(next(reader)).astype(float)
    current_csv = np.array(next(reader)).astype(float)
print("Reading csv:\t{}s".format(round(time.time() - start, 2)))

Reading hdf5:	6.65s
Reading csv:	0.11s


In [7]:
# Check for differences in information read from csv compared to original 
print("Maximum difference in voltage:\t", np.max(voltage - voltage_csv))
print("Maximum difference in current:\t", np.max(current - current_csv))

Maximum difference in voltage:	 0.0
Maximum difference in current:	 0.0
