In [40]:
import os
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ShuffleSplit

base = "data_max_integration_time/"
save_base = "preprocessed_data/"
train_base = save_base + "train/"
test_base = save_base + "test/"

UV = "UV/"
VIS = "VIS/"
IR = "IR/"

exclude_files = ["19.csv", "13.csv"]

scaler = StandardScaler()
sss = ShuffleSplit(n_splits=1, test_size=0.2, random_state=10)

In [41]:
# UV files

filepath = base + UV
files = os.listdir(filepath)
    
UV_x = y = None

for file in files:
    
    if file in exclude_files:
        continue
    
    with open(filepath + file, "r") as f:
        content = f.readlines()
        entries = content[0:-1]
        glucose_concentration = float(content[-1].replace("glucose_concentration:", "").replace("\n", ""))
        
        value = [np.fromstring(entry.replace("\n", ""), sep="|") for entry in entries]
        value = np.concatenate(value, axis=0).reshape(2, -1).mean(axis=0)
        
        UV_x = value if UV_x is None else np.vstack((UV_x, value))
        y = np.array([glucose_concentration]) if y is None else np.append(y, glucose_concentration)

print(UV_x.shape, y.shape)

sss.get_n_splits(UV_x, y)
train_index, test_index = next(sss.split(UV_x, y)) 

np.savetxt(train_base + "y.csv", y[train_index], delimiter=",")
np.savetxt(test_base + "y.csv", y[test_index], delimiter=",")

np.savetxt(train_base + "UV_x.csv", UV_x[train_index], delimiter=",")
np.savetxt(test_base + "UV_x.csv", UV_x[test_index], delimiter=",")

scaled_UV_x = scaler.fit_transform(UV_x)

print(train_index, y[test_index])

np.savetxt(train_base + "UV_x_scaled.csv", scaled_UV_x[train_index], delimiter=",")
np.savetxt(test_base + "UV_x_scaled.csv", scaled_UV_x[test_index], delimiter=",")


(23, 19) (23,)
[12 20 13 14  3 21  6 18 10 19 22  8 16 17  0 15  4  9] [4.  3.8 6.1 4.1 6.2]


In [42]:
# VIS files

filepath = base + VIS
files = os.listdir(filepath)
    
VIS_x = None

for file in files:
    
    if file in exclude_files:
        continue
    
    with open(filepath + file, "r") as f:
        content = f.readlines()
        entries = content[0:-1]
        
        value = [np.fromstring(entry.replace("\n", ""), sep="|") for entry in entries]
        value = np.concatenate(value, axis=0).reshape(2, -1).mean(axis=0)
        
        VIS_x = value if VIS_x is None else np.vstack((VIS_x, value))

print(VIS_x.shape)

scaled_VIS_x = scaler.fit_transform(VIS_x)

np.savetxt(train_base + "VIS_x.csv", VIS_x[train_index], delimiter=",")
np.savetxt(test_base + "VIS_x.csv", VIS_x[test_index], delimiter=",")

np.savetxt(train_base + "VIS_x_scaled.csv", scaled_VIS_x[train_index], delimiter=",")
np.savetxt(test_base + "VIS_x_scaled.csv", scaled_VIS_x[test_index], delimiter=",")

(23, 19)


In [43]:
# IR files

filepath = base + IR
files = os.listdir(filepath)
    
IR_x = None

for file in files:
    
    if file in exclude_files:
        continue
    
    with open(filepath + file, "r") as f:
        content = f.readlines()
        entries = content[0:-1]
        
        value = [np.fromstring(entry.replace("\n", ""), sep="|") for entry in entries]
        value = np.concatenate(value, axis=0).reshape(2, -1).mean(axis=0)
        
        IR_x = value if IR_x is None else np.vstack((IR_x, value))

print(IR_x.shape)

scaled_IR_x = scaler.fit_transform(IR_x)

np.savetxt(train_base + "IR_x.csv", IR_x[train_index], delimiter=",")
np.savetxt(test_base + "IR_x.csv", IR_x[test_index], delimiter=",")

np.savetxt(train_base + "IR_x_scaled.csv", scaled_IR_x[train_index], delimiter=",")
np.savetxt(test_base + "IR_x_scaled.csv", scaled_IR_x[test_index], delimiter=",")

(23, 19)


In [44]:
# Merge all data

ALL_x = np.hstack((UV_x, VIS_x, IR_x))

scaled_ALL_x = scaler.fit_transform(ALL_x)

np.savetxt(train_base + "ALL_x.csv", ALL_x[train_index], delimiter=",")
np.savetxt(test_base + "ALL_x.csv", ALL_x[test_index], delimiter=",")

np.savetxt(train_base + "ALL_x_scaled.csv", scaled_ALL_x[train_index], delimiter=",")
np.savetxt(test_base + "ALL_x_scaled.csv", scaled_ALL_x[test_index], delimiter=",")