In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import glob
pd.set_option('display.max_columns', 41)

Data yang digunakan adalah data yang telah dikoreksi baseline

In [2]:
A_csv_file = glob.glob("../data_baseline/_A*.csv") # Mencari semua berkas CSV yang dimulai dengan 'A' dalam direktori 'data'
B_csv_file = glob.glob("../data_baseline/_B*.csv") # Mencari semua berkas CSV yang dimulai dengan 'B' dalam direktori 'data'

In [3]:
def get_MaxGradient(dataframe, column):
    array = dataframe[column].to_numpy()
    return max(np.gradient(array, edge_order=2))

In [4]:
def extract_FeaturesCubic(CSV_files, label, return_dataset=False):
    """extract max, mean, median, and gradients from given CSV files

    Args:
        CSV_files (list): csv files
        label (str): label
        return_dataset (bool, optional): wheter get formated dataset. Defaults to False.

    Returns:
        DataFrame: if return_dataset is True
        array: if return_dataset is False
    """
    maxi = np.empty((0, 10))
    mean = np.empty((0, 10))
    median = np.empty((0, 10))
    grad = np.empty((0, 10))
    for csv_file in CSV_files:
        df = pd.read_csv(csv_file)
        df.drop(columns=["time(s)","Temp","Humid"], inplace=True)
        _max_list = []
        _mean_list = []
        _median_list = []
        _grad_list = []
        for cols, _ in df.iteritems():        
            _max_list.append(df[cols].max())
            _mean_list.append(df[cols].mean())
            _median_list.append(df[cols].median())
            _grad_list.append(get_MaxGradient(df, cols))
        maxi = np.vstack((maxi, np.array(_max_list).T))
        mean = np.vstack((mean, np.array(_mean_list).T))
        median = np.vstack((median, np.array(_median_list).T))
        grad = np.vstack((grad, np.array(_grad_list).T))
    
    if return_dataset:
        mean_df = pd.DataFrame(mean, columns=["mu1", "mu2", "mu3", "mu4", "mu5", "mu6", "mu7", "mu8", "mu9", "mu10"])
        max_df = pd.DataFrame(maxi, columns=["max1", "max2", "max3", "max4", "max5", "max6", "mu7", "max8", "max9", "max10"])
        median_df = pd.DataFrame(median, columns=["med1", "med2", "med3", "med4", "med5", "med6", "med7", "med8", "med9", "med10"])
        grad_df = pd.DataFrame(grad, columns=["grad1", "grad2", "grad3", "grad4", "grad5", "grad6", "grad7", "grad8", "grad9", "grad10"])
        dataset = pd.concat([mean_df, max_df, median_df, grad_df], axis=1)
        dataset["label"] = label
        return dataset
        
    else:        
        all_arrays = [maxi, mean, median, grad]
        all_arrays = np.transpose(all_arrays, (1, 2, 0))
        return all_arrays

In [5]:
A_array = extract_FeaturesCubic(A_csv_file, label="A")
B_array = extract_FeaturesCubic(B_csv_file, label="B")
print(A_array.shape)
print(B_array.shape)

(35, 10, 4)
(35, 10, 4)


In [6]:
A_array_file_filename = 'A_array_1W.npy'
B_array_file_filename = 'B_array_1W.npy'

np.save(f"../Exp/{A_array_file_filename}", A_array)
np.save(f"../Exp/{B_array_file_filename}", B_array)

In [7]:
A_tensor =  tf.convert_to_tensor(A_array)
B_tensor =  tf.convert_to_tensor(B_array)
print(A_tensor.shape)
print(B_tensor.shape)

(35, 10, 4)
(35, 10, 4)


In [8]:
A_tensor_file_filename = 'A_tensor_1W.tf'
B_tensor_file_filename = 'B_tensor_1W.tf'

tf.io.write_file(f"../Exp/{A_tensor_file_filename}", tf.io.serialize_tensor(A_tensor))
tf.io.write_file(f"../Exp/{B_tensor_file_filename}", tf.io.serialize_tensor(B_tensor))

In [9]:
# how to load
# tensor_bytes = tf.io.read_file(file_path)

# tensor_tf = tf.io.parse_tensor(tensor_bytes, out_type=tf.int32)

###  Get Dataset

In [10]:
A_dataset = extract_FeaturesCubic(A_csv_file, label="A", return_dataset=True)
B_dataset = extract_FeaturesCubic(B_csv_file, label="B", return_dataset=True)

In [11]:
full_dataset = pd.concat([A_dataset, B_dataset], axis=0)
full_dataset.reset_index(drop=True, inplace=True)

#### Save to csv file

In [12]:
full_dataset.to_csv("../dataset/data_2c40d1w.csv", index=False)