In [4]:
from os import listdir
from os.path import isfile, join
import pandas as pd
import numpy as np
import joblib



class DataHandler:
    def __init__(self):
        pass     

    def import_raw(self, volume, path):
        full_data = pd.read_csv(path)             
        full_data.loc[full_data['Z1 Vset'] < 0, 'Z1 Iset'] *= -1
        full_data.loc[full_data['Z1 Vset'] < 0, 'Z1 Imea'] *= -1
        list_of_features = ['Epoch Time', 'Z1 BlkTemp', 'Z1 HeatSink', 'Z1 Iset', 'Z1 Imea']
        raw_data = pd.DataFrame()
        raw_data = pd.concat([raw_data, full_data[list_of_features]], axis=1)    
        raw_data['Volume'] = volume
        return raw_data
    
    def rename_labels(self, raw_data):
        raw_data = raw_data.rename(columns={'Z1 BlkTemp': 'Block Temp',
                                            'Z1 HeatSink': 'Heat Sink Temp',
                                            'Z1 Iset': 'Iset',
                                            'Z1 Imea': 'Imeasure'
                                           })        
        return raw_data
    
    def replace_with_period(self, raw_data):
        # Calculate and use delta_t instead of epoch time
        next_epoch = raw_data['Epoch Time'][1::]
        next_epoch.index -= 1
        
        raw_data['Epoch Time'] = next_epoch - raw_data['Epoch Time']
        raw_data = raw_data.rename(columns={'Epoch Time': 'Period'})
        raw_data = raw_data.dropna()
        
        return raw_data
    
    def add_current_block_rate(self, raw_data):
        prev_temp = raw_data['Block Temp'][:-1]
        prev_temp.index += 1
        raw_data['Block Rate'] = (raw_data['Block Temp'] - prev_temp) / raw_data['Period']
        raw_data = raw_data.dropna()
        
        return raw_data
    
    def add_new_block_rate(self, raw_data):
        raw_data['New Block Rate'] = (raw_data['New Block Temp'] - raw_data['Block Temp']) / raw_data['Period']
        raw_data = raw_data.dropna()
        
        return raw_data    
    
    def add_new_block_temp(self, raw_data):        
        new_block_temp = raw_data['Block Temp'][1::]
        new_block_temp.index -= 1
        raw_data['New Block Temp'] = new_block_temp
        
        raw_data = raw_data.dropna()
        
        return raw_data
    
    def select_columns(self, raw_data, columns):
        raw_data = raw_data[columns]
        return raw_data

    def process_data(self, raw_data):      
        raw_data = self.rename_labels(raw_data)
        raw_data = self.replace_with_period(raw_data)
        raw_data = self.add_current_block_rate(raw_data)
        raw_data = self.add_new_block_temp(raw_data)
        raw_data = self.add_new_block_rate(raw_data)
        
        return raw_data

    
############################################################################################
# CREATE RAW DATASET                                                                       # 
############################################################################################

pcr_train_path = 'train/pcr_training_set.csv'
pcr_test_path = 'test/pcr_testing_set.csv'
peltier_train_path = 'train/peltier_training_set.csv'
peltier_test_path = 'test/peltier_testing_set.csv'


handler = DataHandler()
pcr_dataset = pd.DataFrame()
peltier_dataset = pd.DataFrame()

for volume in [5, 10, 30, 50]:
    dir_path = f"train/raw/{volume}ul/"
    file_list = [join(dir_path, f) for f in listdir(dir_path) if isfile(join(dir_path, f))]
    for path in file_list:
        new_data = handler.import_raw(volume=volume, path=path)
        new_data = handler.process_data(new_data)
        pcr_data = handler.select_columns(new_data, columns = [
                                                            'Volume',                                                            
                                                            'Block Temp',
                                                            'Block Rate', 
                                                            'Iset',
                                                            'Imeasure',
                                                            'New Block Rate'
                                                           ])
        pcr_dataset = pcr_dataset.append(pcr_data)
        peltier_data = handler.select_columns(new_data, columns = [
                                                            'Heat Sink Temp',
                                                            'Block Temp',
                                                            'Iset',
                                                            'Imeasure',                                                            
                                                           ])
        peltier_dataset = peltier_dataset.append(peltier_data)
        

pcr_dataset.to_csv(pcr_train_path, index=False)
peltier_dataset.to_csv(peltier_train_path, index=False)

pcr_dataset = pd.DataFrame()
peltier_dataset = pd.DataFrame()

for volume in [10, 30, 50]:
    dir_path = f"test/raw/{volume}ul/"
    file_list = [join(dir_path, f) for f in listdir(dir_path) if isfile(join(dir_path, f))]
    for path in file_list:
        new_data = handler.import_raw(volume=volume, path=path)
        new_data = handler.process_data(new_data)
        pcr_data = handler.select_columns(new_data, columns = [
                                                            'Volume',                                                            
                                                            'Block Temp',
                                                            'Block Rate', 
                                                            'Iset',
                                                            'Imeasure',
                                                            'New Block Rate'
                                                           ])
        pcr_dataset = pcr_dataset.append(pcr_data)
        peltier_data = handler.select_columns(new_data, columns = [
                                                            'Heat Sink Temp',
                                                            'Block Temp',
                                                            'Iset',
                                                            'Imeasure',                                                            
                                                           ])
        peltier_dataset = peltier_dataset.append(peltier_data)

pcr_dataset.to_csv(pcr_test_path, index=False)
peltier_dataset.to_csv(peltier_test_path, index=False)

KeyError: "['Heat Sink Temp'] not in index"