In [1]:
import os
import numpy as np
import pandas as pd

# RAW DATA #
Data collected with fequency of 500Hz

**Amount of Channel:** 8

1. FC3
2. FCz
3. FC4
4. C3
5. Cz
6. C4
7. CP3
8. CP4

**Collection of MI:** Left and Right

**Sequence:** Rest Data (5s) -> MI Collection Data (5s)

**DataShape:** 1600x2001

In [35]:
RAW_PATH      = r"C:\Users\Dixie\Documents\NTUST\Special Topic II\MI LR HAND\20262201_MIVerification\Dataset\data_dixie.txt"
LABEL_PATH    = r"C:\Users\Dixie\Documents\NTUST\Special Topic II\MI LR HAND\20262201_MIVerification\Dataset\order_dixie.txt"
TEST_OUT_DIR  = r"C:\Users\Dixie\Documents\NTUST\Special Topic II\MI LR HAND\20262201_MIVerification\ProcessedDataset\TEST"
TRAIN_OUT_DIR = r"C:\Users\Dixie\Documents\NTUST\Special Topic II\MI LR HAND\20262201_MIVerification\ProcessedDataset\TRAIN"

In [36]:
#=======================================================================================================
# Constant Variables
#=======================================================================================================
chAmounts = 8

In [37]:
raw_data  = pd.read_csv(RAW_PATH, sep=" ", header=None)
#=======================================================================================================
# Remove the last empty column
#=======================================================================================================
raw_data.drop(raw_data.columns[-1], axis=1, inplace=True)
print("DataSize: ", raw_data.shape)

DataSize:  (1600, 2000)


In [38]:
raw_label  = pd.read_csv(LABEL_PATH, sep=" ", header=None)
print("LabelSize: ", raw_label.shape)

LabelSize:  (100, 1)


In [39]:
def channelsMerging(data, n_ch):
    temp_data      = data.to_numpy()
    n_rows, n_cols = data.shape
    if n_rows % n_ch != 0: 
        return print("Row count not divisible by n_ch")
    n_segments = n_rows // n_ch
    segments = temp_data.reshape(n_segments, n_ch, n_cols)
    return segments

In [40]:
pre_processData     = channelsMerging(raw_data, chAmounts)
post_processData    = pre_processData.reshape(pre_processData.shape[0], -1)
print("ProcessedData Size:", post_processData.shape)
pd.DataFrame(post_processData).head(5)

ProcessedData Size: (200, 16000)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15990,15991,15992,15993,15994,15995,15996,15997,15998,15999
0,0.000172,-0.000535,-0.001225,-0.000826,0.00167,0.004196,0.007325,0.010775,0.013108,0.015204,...,-0.000988,-0.002279,-0.004405,-0.006415,-0.007117,-0.005801,-0.002749,0.000398,0.002749,0.004567
1,0.002249,0.001905,0.000988,0.000465,-0.000504,-0.00031,0.001433,0.00281,0.003975,0.003621,...,0.010488,0.009024,0.007653,0.007591,0.008494,0.00997,0.011801,0.012981,0.012838,0.011647
2,0.010221,0.011619,0.012556,0.01322,0.012516,0.01262,0.014202,0.013827,0.010933,0.005937,...,-0.00396,-0.002673,-0.000442,0.002241,0.003354,0.00126,-0.002875,-0.006956,-0.009741,-0.011035
3,0.000806,-0.001873,-0.004076,-0.005125,-0.003843,-0.001916,0.000489,0.004021,0.006492,0.007878,...,0.005825,0.005939,0.005319,0.00327,0.000305,-0.002738,-0.005135,-0.005503,-0.003262,0.000567
4,0.000113,-0.001539,-0.002013,-0.000828,0.001843,0.004124,0.006349,0.007086,0.005888,0.00564,...,-0.00269,-0.001173,0.000242,0.000789,0.000856,0.00087,0.001142,0.001756,0.002136,0.002243


# DATA SEPARATION

**Purpose:** 
1. Separation of MI data and Rest dataset
2. Test and train separation: 30 - 70
3. Test data contains: Left MI data(15) nd Right MI data (15) 

**Data sequence:** MI Data -> Rest Data

In [41]:
miDataset   = post_processData[0::2]
restDataset = post_processData[1::2]

In [42]:
raw_leftIndex   = np.where(raw_label == 'left')[0]
raw_righIndex   = np.where(raw_label == 'right')[0]

random_leftIndex     = np.random.choice(raw_leftIndex, size=10, replace=False)
random_rightIndex    = np.random.choice(raw_righIndex, size=10, replace=False)
random_compiledIndex = np.sort(np.concatenate((random_leftIndex, random_rightIndex)))
print("Picked random_leftIndex :", random_leftIndex)
print("Picked random_rightIndex:", random_rightIndex)
print("Compiled Index          :", random_compiledIndex)

Picked random_leftIndex : [45 21 48 44 86 64 39 77 68 36]
Picked random_rightIndex: [99  6 14 20 51 34 26 55 78 92]
Compiled Index          : [ 6 14 20 21 26 34 36 39 44 45 48 51 55 64 68 77 78 86 92 99]


In [43]:
choosen_miDataset    = miDataset[random_compiledIndex]
choosen_restDataset  = restDataset[random_compiledIndex]
choosen_labels       = raw_label.iloc[random_compiledIndex]
print("Choosen MI Dataset Size   :", choosen_miDataset.shape)
print("Choosen REST Dataset Size :", choosen_restDataset.shape)
pd.DataFrame(choosen_miDataset).head(5)

Choosen MI Dataset Size   : (20, 16000)
Choosen REST Dataset Size : (20, 16000)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15990,15991,15992,15993,15994,15995,15996,15997,15998,15999
0,0.010416,0.01146,0.010637,0.008525,0.005986,0.003483,0.003293,0.004251,0.004722,0.005747,...,0.00107,0.000506,-0.000143,-0.000895,-0.001542,-0.002,-0.001933,-0.000479,0.001568,0.00285
1,-0.002307,-0.001954,-0.002018,-0.004538,-0.007707,-0.010243,-0.011454,-0.010266,-0.008801,-0.007247,...,0.010839,0.013008,0.011948,0.008336,0.004316,0.001184,-0.000472,-0.000978,-0.000893,-8.7e-05
2,-0.001571,-0.001194,-0.001298,-0.003359,-0.006803,-0.008986,-0.010059,-0.009319,-0.006889,-0.004669,...,0.006558,0.008116,0.009345,0.010203,0.01077,0.010898,0.009863,0.008169,0.006909,0.005841
3,-0.008225,-0.006,-0.001838,0.00103,0.001188,0.001351,0.001393,0.001411,0.00217,0.001588,...,-6.6e-05,0.001055,0.002617,0.004116,0.005226,0.006389,0.007608,0.008186,0.00782,0.006264
4,-0.00482,-0.004725,-0.004334,-0.003447,-0.003399,-0.004741,-0.004697,-0.00339,-0.001664,0.000494,...,-0.004645,-0.00318,-0.002146,-0.001427,-0.001288,-0.001917,-0.003494,-0.005872,-0.007971,-0.009292


In [44]:
train_Index = np.setdiff1d(np.arange(raw_label.shape[0]), random_compiledIndex)
print("Train Shape:", train_Index.shape[0])
print("Train Index:", train_Index)

train_miDataset    = miDataset[train_Index]
train_restDataset  = restDataset[train_Index]
train_labels       = raw_label.iloc[train_Index]

Train Shape: 80
Train Index: [ 0  1  2  3  4  5  7  8  9 10 11 12 13 15 16 17 18 19 22 23 24 25 27 28
 29 30 31 32 33 35 37 38 40 41 42 43 46 47 49 50 52 53 54 56 57 58 59 60
 61 62 63 65 66 67 69 70 71 72 73 74 75 76 79 80 81 82 83 84 85 87 88 89
 90 91 93 94 95 96 97 98]


# SAVING PROCESSED DATASET

In [None]:
#=======================================================================================================
# TEST Dataset
#=======================================================================================================
np.savetxt(os.path.join(TEST_OUT_DIR, "mi_test_dataset_dixie.txt"), choosen_miDataset, fmt="%.6f", delimiter=" ")
np.savetxt(os.path.join(TEST_OUT_DIR, "rest_test_dataset_dixie.txt"), choosen_restDataset, fmt="%.6f", delimiter= " ")
np.savetxt(os.path.join(TEST_OUT_DIR, "label_test_dataset_dixie.txt"), choosen_labels, fmt="%s", delimiter= " ")
np.savetxt(os.path.join(TEST_OUT_DIR, "test_index_dixie.txt"), random_compiledIndex, fmt="%d", delimiter= " ")

#=======================================================================================================
# TRAIN Dataset
#=======================================================================================================
np.savetxt(os.path.join(TRAIN_OUT_DIR, "mi_train_dataset_dixie.txt"), train_miDataset, fmt="%.6f", delimiter=" ")
np.savetxt(os.path.join(TRAIN_OUT_DIR, "rest_train_dataset_dixie.txt"), train_restDataset, fmt="%.6f", delimiter= " ")
np.savetxt(os.path.join(TRAIN_OUT_DIR, "label_train_dataset_dixie.txt"), train_labels, fmt="%s", delimiter= " ")
np.savetxt(os.path.join(TRAIN_OUT_DIR, "train_index_dixie.txt"), train_Index, fmt="%d", delimiter= " ")