In [1]:
import os
import numpy as np
from sklearn.preprocessing import StandardScaler
from joblib import dump

base = "data_max_integration_time/"
save_base = "preprocessed_data/"

UV = "UV/"
VIS = "VIS/"
IR = "IR/"

exclude_files = [] # ["19.csv", "13.csv"]

scaler = StandardScaler()

In [2]:
# UV files

filepath = base + UV
files = os.listdir(filepath)
    
UV_x = y = None

for file in files:
    
    if file in exclude_files:
        continue
    
    with open(filepath + file, "r") as f:
        content = f.readlines()
        entries = content[0:-1]
        glucose_concentration = float(content[-1].replace("glucose_concentration:", "").replace("\n", ""))
        
        value = [np.fromstring(entry.replace("\n", ""), sep="|") for entry in entries]
        value = np.concatenate(value, axis=0).reshape(2, -1).mean(axis=0)
        
        UV_x = value if UV_x is None else np.vstack((UV_x, value))
        y = np.array([glucose_concentration]) if y is None else np.append(y, glucose_concentration)

print(UV_x.shape, y.shape)

np.savetxt(save_base + "y.csv", y, delimiter=",")
np.savetxt(save_base + "UV_x.csv", UV_x, delimiter=",")

scaled_UV_x = scaler.fit_transform(UV_x)
dump(scaler, "UV_std_scaler.joblib")

np.savetxt(save_base + "UV_x_scaled.csv", scaled_UV_x, delimiter=",")


(38, 19) (38,)


In [3]:
# VIS files

filepath = base + VIS
files = os.listdir(filepath)
    
VIS_x = None

for file in files:
    
    if file in exclude_files:
        continue
    
    with open(filepath + file, "r") as f:
        content = f.readlines()
        entries = content[0:-1]
        
        value = [np.fromstring(entry.replace("\n", ""), sep="|") for entry in entries]
        value = np.concatenate(value, axis=0).reshape(2, -1).mean(axis=0)
        
        VIS_x = value if VIS_x is None else np.vstack((VIS_x, value))

print(VIS_x.shape)

scaled_VIS_x = scaler.fit_transform(VIS_x)
dump(scaler, "VIS_std_scaler.joblib")

np.savetxt(save_base + "VIS_x.csv", VIS_x, delimiter=",")
np.savetxt(save_base + "VIS_x_scaled.csv", scaled_VIS_x, delimiter=",")

(38, 19)


In [4]:
# IR files

filepath = base + IR
files = os.listdir(filepath)
    
IR_x = None

for file in files:
    
    if file in exclude_files:
        continue
    
    with open(filepath + file, "r") as f:
        content = f.readlines()
        entries = content[0:-1]
        
        value = [np.fromstring(entry.replace("\n", ""), sep="|") for entry in entries]
        value = np.concatenate(value, axis=0).reshape(2, -1).mean(axis=0)
        
        IR_x = value if IR_x is None else np.vstack((IR_x, value))

print(IR_x.shape)

scaled_IR_x = scaler.fit_transform(IR_x)
dump(scaler, "IR_std_scaler.joblib")

np.savetxt(save_base + "IR_x.csv", IR_x, delimiter=",")
np.savetxt(save_base + "IR_x_scaled.csv", scaled_IR_x, delimiter=",")

(38, 19)
