# Data wrangling

AC voltage signals obtained from each file in each folder. Signals are saved as numpy arrays in a .npy format in the `Data/wrangled_data` folder

In [2]:
import pandas as pd
import numpy as np
import os

In [3]:
def get_voltage(parent_directory: str) -> np.ndarray:
    matrix = np.ones(shape=(len(os.listdir(parent_directory)),10001))
    i = 0
    for filename in os.listdir(parent_directory):
        df = pd.read_table(parent_directory+filename, delim_whitespace=True, header=None,skiprows=57)
        header_row = ["Pt","T","Vf","Im","Vu","Sig","Ach","IERange","Over","Temp"]
        df = df.set_axis(header_row, axis=1, inplace=False)
        # Check if last line in file is text 
        if isinstance(df["Vf"].iloc[-1], str):
            df = df[:-1]
            df["Vf"] = df['Vf'].astype(float)
        row = np.asarray(df["Vf"]-df["Vf"][0])
        matrix[i,:] = row
        i += 1

    return matrix

In [13]:
as_600 = get_voltage("Data/Data/Voltage response/AS/PWRCAPACITY_AS_60/OTHER/")
cs_600 = get_voltage("Data/Data/Voltage response/CS/PWRCAPACITY_CS_60/OTHER/")
normal_600 = get_voltage("Data/Data/Voltage response/Normal/PWRCAPACITY_Normal_60/OTHER/")

In [12]:
normal_600.shape

(250, 10001)

In [16]:
cd_600_conds = [as_600, cs_600, normal_600]
cd_600_names = ["as_600", "cs_600", "normal_600"]

for i in range(len(cd_600_conds)):
    np.save(f"Data/wrangled_data/{cd_600_names[i]}.npy", cd_600_conds[i])

In [17]:
test = np.load("Data/wrangled_data/as_600.npy")
test

array([[ 0.0000e+00, -1.8020e-03, -1.6020e-03, ..., -1.9020e-03,
        -2.1020e-03, -1.6020e-03],
       [ 0.0000e+00, -1.3020e-03, -1.4020e-03, ..., -5.9020e-03,
        -5.7020e-03, -5.6020e-03],
       [ 0.0000e+00, -2.0200e-04, -2.0000e-06, ...,  5.4980e-03,
         5.8980e-03,  6.2980e-03],
       ...,
       [ 0.0000e+00, -5.0200e-04,  1.9800e-04, ..., -3.2020e-03,
        -2.9020e-03, -2.7020e-03],
       [ 0.0000e+00, -1.8020e-03, -1.7020e-03, ..., -6.9020e-03,
        -6.6020e-03, -6.5020e-03],
       [ 0.0000e+00, -1.8020e-03, -1.4020e-03, ...,  1.1598e-02,
         1.1598e-02,  1.1998e-02]])

In [18]:
as_700 = get_voltage("Data/Data/Voltage response/AS/PWRCAPACITY_AS_70/OTHER/")
cs_700 = get_voltage("Data/Data/Voltage response/CS/PWRCAPACITY_CS_70/OTHER/")
normal_700 = get_voltage("Data/Data/Voltage response/Normal/PWRCAPACITY_Normal_70/OTHER/")

In [19]:
cd_700_conds = [as_700, cs_700, normal_700]
cd_700_names = ["as_700", "cs_700", "normal_700"]

for i in range(len(cd_700_conds)):
    np.save(f"Data/wrangled_data/{cd_700_names[i]}.npy", cd_700_conds[i])

In [20]:
as_800 = get_voltage("Data/Data/Voltage response/AS/PWRCAPACITY_AS_80/OTHER/")
cs_800 = get_voltage("Data/Data/Voltage response/CS/PWRCAPACITY_CS_80/OTHER/")
normal_800 = get_voltage("Data/Data/Voltage response/Normal/PWRCAPACITY_Normal_80/OTHER/")

In [21]:
cd_800_conds = [as_800, cs_800, normal_800]
cd_800_names = ["as_800", "cs_800", "normal_800"]

for i in range(len(cd_800_conds)):
    np.save(f"Data/wrangled_data/{cd_800_names[i]}.npy", cd_800_conds[i])