In [2]:
# Importing necessary library
import numpy as np
import matplotlib.pyplot as plt
import pywt
import pandas as pd
import os
import time
from scipy import signal
from stingray import lightcurve
import sys
from stingray import Bispectrum
import warnings
import csv
warnings.filterwarnings('ignore')
%matplotlib inline



In [3]:
# file that will be skipped because of information loss
# zeros_test = ['co2a0000368_91.csv', 'co2c0000341_26.csv']
# zeros_train = ['co2a0000368_0.csv', 'co2a0000368_1.csv', 'co2a0000368_2.csv', 'co2a0000368_3.csv', 'co2a0000368_4.csv', 'co2a0000368_5.csv', 'co2c0000341_27.csv']

### Menghitung Matriks Cumulant orde ke-3

In [4]:
def calcCumulantOrde3(df_data, t, lag):
    # Compute the bispectrum of the signal
    lc = lightcurve.Lightcurve(t,df_data)
    bs = Bispectrum(lc, maxlag=lag)

    # Plot the bispectrum using contour plots
    # plt.contour(bs.freq, bs.freq, bs.bispec_mag)
    # plt.xlabel('f1')
    # plt.ylabel('f2')
    # plt.show()

    # # Plot the bispectrum using mesh plots
    # fig = plt.figure()
    # ax = fig.add_subplot(111, projection='3d')
    # X, Y = np.meshgrid(bs.freq, bs.freq)
    # ax.plot_surface(X, Y, bs.bispec_mag)
    # ax.set_xlabel('f1')
    # ax.set_ylabel('f2')
    # ax.set_zlabel('Bispectrum')
    # plt.show()

    return bs

### Melakukan dekomposisi wavelet

In [5]:
def calcWaveletDec(bs):
    # Select wavelet and decomposition level
    wavelet = 'db4'
    level = 5

    # Deecompose signal
    coeffs = pywt.wavedec2(bs.cum3, wavelet, level=level)

    # print(coeffs)
    # Visualize
    
    cA5 = coeffs[0][np.triu(np.ones_like(coeffs[0], dtype=bool))]
    cD5 = np.ravel([coeffs[1][0], coeffs[1][1], coeffs[1][2]])
    cD4 = np.ravel([coeffs[2][0], coeffs[2][1], coeffs[2][2]])
    cD3 = np.ravel([coeffs[3][0], coeffs[3][1], coeffs[3][2]])
    cD2 = np.ravel([coeffs[4][0], coeffs[4][1], coeffs[4][2]])
    cD1 = np.ravel([coeffs[5][0], coeffs[5][1], coeffs[5][2]])
    
    coeff = [cA5,cD5,cD4,cD3,cD2,cD1]
    # fig, axs = plt.subplots(6)
    
    
    # axs[0].plot(cA5)
    # axs[0].set_title(f'Approximation - Level 5')
    # axs[1].plot(cD5)
    # axs[1].set_title(f'Detail - Level 5')
    # axs[2].plot(cD4)
    # axs[2].set_title(f'Detail - Level 4')
    # axs[3].plot(cD3)
    # axs[3].set_title(f'Detail - Level 3')
    # axs[4].plot(cD2)
    # axs[4].set_title(f'Detail - Level 2')
    # axs[5].plot(cD1)
    # axs[5].set_title(f'Detail - Level 1')
    # plt.show()

    return coeffs


### Menghitung energi relatif

In [6]:
def calcRelativeEnergy(coeffs, df_data):
    # Calculate relative wavelet energy
    energies = []
    for c in coeffs:
        energies.append(np.sum(np.square(c)))

    decomp = ['A5', 'D1', 'D2', 'D3', 'D4', 'D5']

    energies[1:6] = energies[-1:-6:-1]

    total_energy = np.sum(energies)
    relative_energies = [(e / total_energy) * 100 for e in energies]

    # print(relative_energies)

    # plt.plot(decomp, energies)
    # plt.xlabel('Dimension Number')
    # plt.ylabel('Wavelet Bispectrum Energy')
    # plt.show()

    # plt.plot(decomp, relative_energies)
    # plt.xlabel('Dimension Number')
    # plt.ylabel('Relative Wavelet Bispectrum Energy')
    # plt.show()

    return energies, relative_energies

### Persiapan data

In [7]:
# Define sampling frequency
fs = 256
t = np.arange(0, 1, 1/fs)

def get_csv_EEG(filename):
    # Load data from CSV
    df_data = pd.read_csv(filename)
    return df_data, df_data.columns


### Perhitungan RWB

In [8]:
def extract_feature(directory, lag, destination):
    recap = pd.DataFrame(columns=['Wall Time', 'CPU Time'])
    for foldername in os.listdir(directory):
        folder = os.path.join(directory, foldername)
        if os.path.isdir(folder):
            des_dir = os.path.join(destination +"_" + str(lag), foldername).lower()
            files = os.listdir(folder)
            for filename in files:
                # print(f"Extract feature from {filename}...")
                cpu_start = time.process_time()
                wt_start = time.time()
                # if filename in zeros_train or filename in zeros_test:
                #     continue
                rel_path = os.path.join(directory, foldername, filename)
                # if 'metadata' in filename.lower():
                #     continue
                # trial_number = filename.split('.')[0].split('_')[1]
                df_data, channel_name = get_csv_EEG(rel_path)
                des_file = foldername +'_'+ filename + '_bispectrum' +'.npy'
                if not os.path.exists(des_dir):
                    os.makedirs(des_dir)
                des_path = os.path.join(des_dir, des_file)
                if os.path.exists(des_path):
                    continue
                RWB = []
                for channel in channel_name:
                    y = df_data[channel]; # sinyal per channel
                    # N = len(y);
                    # z = y - np.mean(y);
                    # nsamp = len (y[0])
                    energies, relative_energies = calcRelativeEnergy(calcWaveletDec(calcCumulantOrde3(y, t, lag)), y)
                    for x in relative_energies:
                        RWB.append(x)
                RWB = np.array(RWB)
                np.save(des_path, RWB)
                wt_end = time.time()
                cpu_end = time.process_time()
                wall_time = wt_end - wt_start
                cpu_time = cpu_end - cpu_start
                recap_temp = pd.DataFrame([[wall_time, cpu_time]],columns=recap.columns)
                recap = pd.concat([recap, recap_temp], ignore_index=True)
                # print(f"CPU Time: {cpu_time}, Wall Time: {wall_time}")
                # pd.DataFrame(RWB.T).to_csv(des_path, index=False)
    recap_dir = os.path.join('./logs/Execution',directory.split('/')[1])
    if not os.path.exists(recap_dir):
        os.makedirs(recap_dir)
    recap_path = os.path.join(recap_dir,'recap_rwb'+str(lag)+'.csv')
    recap.to_csv(recap_path)


### Mengecek nilai nol pada data

In [9]:
def check_zeros(directory):
    contain_zero = []
    for foldername in os.listdir(directory):
        folder = os.path.join(directory, foldername)
        if os.path.isdir(folder):
            files = os.listdir(folder)
            for filename in files:
                rel_path = os.path.join(directory, foldername, filename)
                # if 'metadata' in filename.lower():
                #     continue
                df_data, channel_name = get_csv_EEG(rel_path)
                for channel in channel_name:
                    if (df_data[channel]== 0).all():
                        contain_zero.append(filename)
                        break
    return contain_zero

In [10]:
# check_zeros(os.path.join(directory_segmented, "autism"))

### Testing

In [11]:
def get_dummy(directory, lag):
    for foldername in os.listdir(directory):
        folder = os.path.join(directory, foldername)
        if os.path.isdir(folder):
            # des_dir = os.path.join(directory.replace('CSV', 'FEATURE')+"_" + str(lag),foldername).lower()
            files = os.listdir(folder)
            for filename in files:
                rel_path = os.path.join(directory, foldername, filename)
                # if 'metadata' in filename.lower():
                #     continue
                # trial_number = filename.split('.')[0].split('_')[1]
                print(rel_path)
                df_data, channel_name = get_csv_EEG(rel_path)
                RWB = []
                return df_data[channel_name[0]]
                # pd.DataFrame(RWB.T).to_csv(des_path, index=False)
        


In [12]:
lag = 128
dummy = get_dummy("datasets/segmented_1 seconds/autism", lag)
bs = calcCumulantOrde3(dummy, t, lag)
print(bs.bispec_mag.shape)
print(bs.freq.shape)

wavelet = calcWaveletDec(bs)
print(wavelet)
relative_energies = calcRelativeEnergy(wavelet, dummy)
print(relative_energies)


datasets/segmented_1 seconds/autism\Bader\segment_1.csv
(257, 257)
(257,)
[array([[-156662.96042751, -156253.50600977, -156967.69919311,
        -151520.1468112 , -182596.39787673,  -74316.8754926 ,
         -64901.306805  ,   48646.05021512,  115246.99196845,
          57067.27696352,   -8610.91153674,  -21299.74726212,
         -13109.45801381,   10444.0373371 ],
       [-156253.50600977, -156129.82832001, -156739.4656264 ,
        -151043.29843222, -183329.45109955,  -71152.33035585,
         -63922.16128157,   46257.62236284,  112744.77279733,
          54815.02124987,   -9125.08604814,  -20520.1527523 ,
         -12268.96646847,    9737.006924  ],
       [-156967.69919311, -156739.4656264 , -157408.04091791,
        -151744.29558364, -183813.36584371,  -72309.29859885,
         -63893.16288043,   47685.28455731,  113700.4473451 ,
          55179.99846436,   -9074.84582528,  -20562.1129522 ,
         -12484.09203099,   10161.88104197],
       [-151520.1468112 , -151043.29843222, -1

In [11]:
# print(relative_energies)

([np.float64(2253585272909.3906), np.float64(39615306.092804804), np.float64(905788803.1997424), np.float64(6225815208.272333), np.float64(104061043541.29054), np.float64(483578223580.7988)], [np.float64(79.11770214910064), np.float64(0.0013907936059369167), np.float64(0.03179996319776666), np.float64(0.2185726891299383), np.float64(3.653321108899278), np.float64(16.977213296066445)])


### Main Program

In [12]:
directory_segmented = "datasets/segmented_1 seconds"
directory_feature = "datasets/features/RWB/segment_1 seconds"

In [1]:
# lags = [256, 128, 64, 32, 16, 8, 4, 2]
lags = [256]

In [13]:
for lag in lags:
    extract_feature(os.path.join(directory_segmented, "autism"), lag, os.path.join(directory_feature, "autism"))
    extract_feature(os.path.join(directory_segmented, "normal"), lag, os.path.join(directory_feature, "normal"))

In [14]:
directory_logs = "logs/Execution/segmented_1 seconds"

for lag in lags:
    df = pd.DataFrame()
    for folder in os.listdir(directory_logs):
        temp = pd.read_csv(os.path.join(directory_logs, folder, f"recap_rwb{lag}.csv"))
        df = pd.concat([df, temp])
    df = df.drop('Unnamed: 0', axis=1)   
    print(f"Lag {lag}:\n", df.sum(), "\n")

Lag 256:
 Wall Time    12912.206271
CPU Time     21772.781250
dtype: float64 

