In [1]:
import os
import numpy as np
import h5py
from scipy.io import loadmat

# Define the path to save the HDF5 file on the D drive
hdf5_path = r'D:\emg_data_final.h5'

# Define the path to the data
path = r'C:\Users\user\Desktop\DB2_Extract\DB2_All'
S_range = range(1, 41)
E_range = range(1, 4)

# Function to load and process a single file
def load_file_data(i, j):
    filename = f"S{i}_E{j}_A1.mat"
    file_path = os.path.join(path, filename)
    if os.path.exists(file_path):
        data = loadmat(file_path)
        emg_data = data['emg'].astype(np.float16)
        stimulus_data = data['stimulus'].astype(np.float16)
        repetition_data = data['repetition'].astype(np.float16)
        num_samples = emg_data.shape[0]
        subject = np.full((num_samples, 1), i, dtype=np.float16)
        final = np.hstack((emg_data, stimulus_data, repetition_data, subject))
        return final
    return None

# Load and process all files
results = []
for i in S_range:
    for j in E_range:
        result = load_file_data(i, j)
        if result is not None:
            results.append(result)

# Combine all results
if results:
    final_combined = np.vstack(results)

    # Write the combined data to an HDF5 file
    with h5py.File(hdf5_path, 'w') as hdf5_file:
        hdf5_file.create_dataset('final', data=final_combined, chunks=True)

    # Now you can load the final data from the HDF5 file as needed
    with h5py.File(hdf5_path, 'r') as hdf5_file:
        final = np.array(hdf5_file['final'])

    print("Final Data Shape:", final.shape)
else:
    print("No valid data files found.")

Final Data Shape: (207713115, 15)


In [None]:
# del emg_data, stimulus_data, repetition_data, subject 

In [None]:
# subject = subject.reshape(-1,1)

In [None]:
# final = np.hstack((emg_data,stimulus_data,repetition_data,subject)).astype(np.float16)
# final

In [2]:
print(np.unique(final[:,-3]))

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17.
 18. 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35.
 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49.]


In [3]:
print(np.unique(final[:,-1]))

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18.
 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35. 36.
 37. 38. 39. 40.]


In [4]:
def keep_rows_with_values(array, column_index, values_to_remove):
    mask = np.isin(array[:, column_index], values_to_remove)
    filtered_array = array[mask]
    filtered_array = filtered_array.astype(float)
    return filtered_array

In [5]:
train = 0
test = 0
repetition_data=0
stimulus_data=0
emg_data=0
sunject=0

In [6]:
train = keep_rows_with_values(final,13,[1,3,4,6])

In [7]:
test = keep_rows_with_values(final,13,[2,5])

In [8]:
train = train.astype(np.float16)

In [9]:
test = test.astype(np.float16)

In [10]:
import numpy as np

# Assuming 'train' is your input array
# For the sake of the example, we'll assume 'train' has a shape of (n, 15)

train_final_list = []
max_samples = 10000

# Precompute unique values to avoid recomputation inside the loops
unique_subjects = np.unique(train[:, -3])
unique_k = np.unique(train[:, -1])

# Create a boolean mask for all conditions
mask_i = np.isin(train[:, -3], unique_subjects)
mask_j = np.isin(train[:, -2], [1, 3, 4, 6])
mask_combined = mask_i & mask_j

# Filter the train array once based on the combined mask
filtered_train = train[mask_combined]

# Iterate over unique subjects
for i in unique_subjects:
    selection = filtered_train[filtered_train[:, -3] == i]
    
    # Iterate over the selected conditions
    for j in [1, 3, 4, 6]:
        new_selection = selection[selection[:, -2] == j]
        
        # Iterate over unique k values
        for k in unique_k:
            k_selection = new_selection[new_selection[:, -1] == k]
            num_samples = k_selection.shape[0]
            
            if num_samples >= max_samples:
                data = k_selection[:max_samples]
            else:
                # Preallocate and pad with zeros if necessary
                data = np.zeros((max_samples, 15), dtype=np.float16)
                data[:num_samples] = k_selection
            
            train_final_list.append(data)

# Use np.vstack to concatenate the final list of arrays
train_final = np.vstack(train_final_list)

print("Train Final Shape:", train_final.shape)

Train Final Shape: (78400000, 15)


In [11]:
np.unique(train_final[:,-1])

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
       26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
       39., 40.], dtype=float16)

In [12]:
import numpy as np

# Assuming 'test' is your input array
# For the sake of the example, we'll assume 'test' has a shape of (n, 15)

test_final_list = []
max_samples = 10000

# Precompute unique values to avoid recomputation inside the loops
unique_subjects = np.unique(test[:, -3])
unique_k = np.unique(test[:, -1])

# Create a boolean mask for all conditions
mask_i = np.isin(test[:, -3], unique_subjects)
mask_j = np.isin(test[:, -2], [2, 5])
mask_combined = mask_i & mask_j

# Filter the test array once based on the combined mask
filtered_test = test[mask_combined]

# Iterate over unique subjects
for i in unique_subjects:
    selection = filtered_test[filtered_test[:, -3] == i]
    
    # Iterate over the selected conditions
    for j in [2, 5]:
        new_selection = selection[selection[:, -2] == j]
        
        # Iterate over unique k values
        for k in unique_k:
            k_selection = new_selection[new_selection[:, -1] == k]
            num_samples = k_selection.shape[0]
            
            if num_samples >= max_samples:
                data = k_selection[:max_samples]
            else:
                # Preallocate and pad with zeros if necessary
                data = np.zeros((max_samples, 15), dtype=np.float32)
                data[:num_samples] = k_selection
            
            test_final_list.append(data)

# Use np.vstack to concatenate the final list of arrays
test_final = np.vstack(test_final_list)

print("Test Final Shape:", test_final.shape)

Test Final Shape: (39200000, 15)


In [13]:
train_final = train_final[:,:-1]
test_final = test_final[:,:-1]

In [14]:
train_final[:,-2], test_final[:,-2]

(array([ 1.,  1.,  1., ..., 49., 49., 49.], dtype=float16),
 array([ 1.,  1.,  1., ..., 49., 49., 49.], dtype=float32))

In [15]:
train_final.shape, test_final.shape

((78400000, 14), (39200000, 14))

In [16]:
np.unique(train_final[:,-1]), np.unique(test_final[:,-1])

(array([0., 1., 3., 4., 6.], dtype=float16),
 array([0., 2., 5.], dtype=float32))

In [17]:
Train_Class_Label = train_final[:,12]
Test_Class_Label = test_final[:,12]

In [18]:
train_final = train_final[:,:-1]
test_final = test_final[:,:-1]

In [19]:
train_final = train_final[:,:-1]
test_final = test_final[:,:-1]

In [20]:
train_final.shape , test_final.shape

((78400000, 12), (39200000, 12))

In [21]:
np.savetxt("Train_Class_Label.csv", Train_Class_Label , delimiter = ',', fmt='%f')
np.savetxt("Test_Class_Label.csv", Test_Class_Label, delimiter = ',', fmt='%f')
np.savetxt("Train.csv", train_final, delimiter = ',', fmt='%f')
np.savetxt("Test.csv", test_final, delimiter = ',', fmt='%f')