In [1]:
# Importing necessary library
import numpy as np
import matplotlib.pyplot as plt
import pywt
import pandas as pd
import os
import time
from scipy import signal
from stingray import lightcurve
import sys
from stingray import Bispectrum
import warnings
import csv
warnings.filterwarnings('ignore')
%matplotlib inline



In [2]:
# file that will be skipped because of information loss
# zeros_test = ['co2a0000368_91.csv', 'co2c0000341_26.csv']
# zeros_train = ['co2a0000368_0.csv', 'co2a0000368_1.csv', 'co2a0000368_2.csv', 'co2a0000368_3.csv', 'co2a0000368_4.csv', 'co2a0000368_5.csv', 'co2c0000341_27.csv']

### Menghitung Matriks Cumulant orde ke-3

In [3]:
def calcCumulantOrde3(df_data, t, lag):
    # Compute the bispectrum of the signal
    lc = lightcurve.Lightcurve(t,df_data)
    bs = Bispectrum(lc, maxlag=lag)
    return bs

### Melakukan dekomposisi wavelet

In [4]:
def calcWaveletDec(bs):
    # Select wavelet and decomposition level
    wavelet = 'db4'
    level = 5

    # Deecompose signal
    coeffs = pywt.wavedec2(bs.cum3, wavelet, level=level)
    
    # cA5 = coeffs[0][np.triu(np.ones_like(coeffs[0], dtype=bool))]
    # cD5 = np.ravel([coeffs[1][0], coeffs[1][1], coeffs[1][2]])
    # cD4 = np.ravel([coeffs[2][0], coeffs[2][1], coeffs[2][2]])
    # cD3 = np.ravel([coeffs[3][0], coeffs[3][1], coeffs[3][2]])
    # cD2 = np.ravel([coeffs[4][0], coeffs[4][1], coeffs[4][2]])
    # cD1 = np.ravel([coeffs[5][0], coeffs[5][1], coeffs[5][2]])
    
    # coeff = [cA5,cD5,cD4,cD3,cD2,cD1]
    # # fig, axs = plt.subplots(6)
    
    
    # # axs[0].plot(cA5)
    # # axs[0].set_title(f'Approximation - Level 5')
    # # axs[1].plot(cD5)
    # # axs[1].set_title(f'Detail - Level 5')
    # # axs[2].plot(cD4)
    # # axs[2].set_title(f'Detail - Level 4')
    # # axs[3].plot(cD3)
    # # axs[3].set_title(f'Detail - Level 3')
    # # axs[4].plot(cD2)
    # # axs[4].set_title(f'Detail - Level 2')
    # # axs[5].plot(cD1)
    # # axs[5].set_title(f'Detail - Level 1')
    # # plt.show()

    return coeffs


### Menghitung energi relatif

In [5]:
def calcRelativeEnergy(coeffs, df_data):
    # Calculate relative wavelet energy
    energies = []
    for c in coeffs:
        energies.append(np.sum(np.square(c)))

    energies[1:6] = energies[-1:-6:-1]

    total_energy = np.sum(energies)
    relative_energies = [(e / total_energy) * 100 for e in energies]

    return energies, relative_energies

In [6]:
# Testing 

### Persiapan data

In [7]:
# Define sampling frequency
fs = 256
t = np.arange(0, 1, 1/(fs*10))
t.shape

(2560,)

In [8]:

def get_csv_EEG(filename):
    # Load data from CSV
    df_data = pd.read_csv(filename)
    return df_data, df_data.columns


### Perhitungan RWB

In [9]:
def extract_feature(directory, lag, destination, segment_time=1):
    fs = 256
    t = np.arange(0, 1, 1/(fs * segment_time))

    recap = pd.DataFrame(columns=['Wall Time', 'CPU Time'])
    for foldername in os.listdir(directory):
        folder = os.path.join(directory, foldername)
        if os.path.isdir(folder):
            des_dir = os.path.join(destination +"_" + str(lag), foldername).lower()
            files = os.listdir(folder)
            for filename in files:
                # print(f"Extract feature from {filename}...")
                cpu_start = time.process_time()
                wt_start = time.time()
                # if filename in zeros_train or filename in zeros_test:
                #     continue
                rel_path = os.path.join(directory, foldername, filename)
                # if 'metadata' in filename.lower():
                #     continue
                # trial_number = filename.split('.')[0].split('_')[1]
                df_data, channel_name = get_csv_EEG(rel_path)
                des_file = foldername +'_'+ filename + '_bispectrum' +'.npy'
                if not os.path.exists(des_dir):
                    os.makedirs(des_dir)
                des_path = os.path.join(des_dir, des_file)
                if os.path.exists(des_path):
                    continue
                RWB = []
                for channel in channel_name:
                    y = df_data[channel]; # sinyal per channel
                    # N = len(y);
                    # z = y - np.mean(y);
                    # nsamp = len (y[0])
                    energies, relative_energies = calcRelativeEnergy(calcWaveletDec(calcCumulantOrde3(y, t, lag)), y)
                    for x in relative_energies:
                        RWB.append(x)
                RWB = np.array(RWB)
                np.save(des_path, RWB)
                wt_end = time.time()
                cpu_end = time.process_time()
                wall_time = wt_end - wt_start
                cpu_time = cpu_end - cpu_start
                recap_temp = pd.DataFrame([[wall_time, cpu_time]],columns=recap.columns)
                recap = pd.concat([recap, recap_temp], ignore_index=True)
                # print(f"CPU Time: {cpu_time}, Wall Time: {wall_time}")
                # pd.DataFrame(RWB.T).to_csv(des_path, index=False)
    recap_dir = os.path.join('./logs/Execution',directory.split('/')[1])
    if not os.path.exists(recap_dir):
        os.makedirs(recap_dir)
    recap_path = os.path.join(recap_dir,'recap_rwb'+str(lag)+'.csv')
    recap.to_csv(recap_path)


### Mengecek nilai nol pada data

In [10]:
def check_zeros(directory):
    contain_zero = []
    for foldername in os.listdir(directory):
        folder = os.path.join(directory, foldername)
        if os.path.isdir(folder):
            files = os.listdir(folder)
            for filename in files:
                rel_path = os.path.join(directory, foldername, filename)
                # if 'metadata' in filename.lower():
                #     continue
                df_data, channel_name = get_csv_EEG(rel_path)
                for channel in channel_name:
                    if (df_data[channel]== 0).all():
                        contain_zero.append(filename)
                        break
    return contain_zero

In [11]:
# check_zeros(os.path.join(directory_segmented, "autism"))

### Testing

In [12]:
def get_dummy(directory, lag):
    for foldername in os.listdir(directory):
        folder = os.path.join(directory, foldername)
        if os.path.isdir(folder):
            # des_dir = os.path.join(directory.replace('CSV', 'FEATURE')+"_" + str(lag),foldername).lower()
            files = os.listdir(folder)
            for filename in files:
                rel_path = os.path.join(directory, foldername, filename)
                # if 'metadata' in filename.lower():
                #     continue
                # trial_number = filename.split('.')[0].split('_')[1]
                print(rel_path)
                df_data, channel_name = get_csv_EEG(rel_path)
                RWB = []
                return df_data[channel_name[0]]
                # pd.DataFrame(RWB.T).to_csv(des_path, index=False)
        


In [17]:
lag = 256

dummy = get_dummy("datasets/segmented_10 seconds/autism", lag)
bs = calcCumulantOrde3(dummy, t, lag)
print(bs.bispec_mag.shape)
print(bs.cum3.shape)
print(bs.bispec.shape)

wavelet = calcWaveletDec(bs)
print(np.array(wavelet).shape)
relative_energies = calcRelativeEnergy(wavelet, dummy)
print(relative_energies)


datasets/segmented_10 seconds/autism\Bader\segment_1.csv
(513, 513)
(513, 513)
(513, 513)
(6,)
([112107909129063.05, 32392153.511949405, 605719914.025452, 6014831714.317251, 154661772562.2268, 1248157569376.144], [98.75836434195911, 2.853497244936536e-05, 0.0005335922186331983, 0.00529859977331537, 0.13624501431820848, 1.0995299167582784])


In [14]:
# print(relative_energies)

### Main Program

In [18]:
SEGMENT_TIME = 1

directory_segmented = f"datasets/segmented_{SEGMENT_TIME} seconds"
directory_feature = f"datasets/features/RWB/segment_{SEGMENT_TIME} seconds"
directory_logs = f"logs/Execution/segmented_{SEGMENT_TIME} seconds"

In [21]:
# lags = [256, 128, 64, 32, 16, 8, 4, 2]
lags = [128]

In [22]:
for lag in lags:
    extract_feature(os.path.join(directory_segmented, "autism"), lag, os.path.join(directory_feature, "autism"), segment_time=SEGMENT_TIME)
    extract_feature(os.path.join(directory_segmented, "normal"), lag, os.path.join(directory_feature, "normal"), segment_time=SEGMENT_TIME)

In [19]:
lags = [256, 128, 64, 32, 16, 8, 4, 2]

SEGMENT_TIME = 1

for lag in lags:
    df = pd.DataFrame()
    for folder in os.listdir(directory_logs):
        temp = pd.read_csv(os.path.join(directory_logs, folder, f"recap_rwb{lag}.csv"))
        df = pd.concat([df, temp])
    df = df.drop('Unnamed: 0', axis=1)   
    print(f"Lag {lag}:\n", df.sum(), "\n")

Lag 256:
 Wall Time    3776.337031
CPU Time     6545.921875
dtype: float64 

Lag 128:
 Wall Time    4054.345197
CPU Time     6908.421875
dtype: float64 

Lag 64:
 Wall Time    1536.385925
CPU Time     2219.437500
dtype: float64 

Lag 32:
 Wall Time    1071.020538
CPU Time     1263.968750
dtype: float64 

Lag 16:
 Wall Time    565.037749
CPU Time     682.375000
dtype: float64 

Lag 8:
 Wall Time    691.267961
CPU Time     881.984375
dtype: float64 

Lag 4:
 Wall Time    556.042306
CPU Time     132.484375
dtype: float64 

Lag 2:
 Wall Time    136.555223
CPU Time      63.453125
dtype: float64 

