In [None]:
conda install -c conda-forge opencv


In [None]:
# Analyzing, cleaning, exploring data
import pandas as pd 
from pandas import DataFrame
import re

# Performing math operations on arrays and other structures.
import numpy as np

# Visualizing data
import matplotlib.pyplot as plt 


from sklearn.preprocessing import LabelEncoder

import pywt


## Importing data and extracting to a dataframe

In [None]:
import glob

new_list2 = []

columns = ['Timestamp', 'El1', 'El2', 'El3','El4', 'El5', 'El6','El7', 'El8']


# Specify the folder path where the CSV files are located
folder_path = "first_try/"

# Get a list of all CSV files in the folder
file_list = glob.glob(folder_path + "*.csv")

# Create an empty list to store the data from each CSV file
dataframes = []
total_num_rows = 0  # Variable to store the total number of rows
extracted_strings = []


# Loop through each CSV file, read it into a DataFrame, and append it to the list
for file in file_list:
    df = pd.read_csv(file, names=columns)
    num_rows = df.shape[0]
    
    file_name = file.split("/")[-1]  # Extract the file name from the file path
    extracted_string = re.search(r'-(.*?)-', file_name).group(1)  # Extract the desired string using regex
        
    extracted_strings.extend([extracted_string] * num_rows)
    
    
    extracted_numbers = re.findall(r'\d+', file)[0]

    
    new_list = [extracted_numbers] * num_rows
    new_list2.append(new_list)
    dataframes.append(df)
    
    total_num_rows += num_rows


# Concatenate all DataFrames in the list into a single DataFrame
data = pd.concat(dataframes, ignore_index=True)

new_list2 = [item for sublist in new_list2 for item in sublist]  # Flatten the nested list


In [None]:
data["P_ID"] = new_list2
data["Gesture"] = extracted_strings

In [None]:
le = LabelEncoder()
data['encoded_column'] = le.fit_transform(data['Gesture'])

In [None]:
data

In [None]:
data.Gesture.value_counts()

In [None]:
data.Gesture.value_counts()

In [None]:
data.encoded_column.value_counts()

## Data Cleaning

In [None]:
data.describe(include="all")

In [None]:
hist = data.hist(figsize=(16, 8))

In [None]:
# Take time and one column and convert it into a wavelet transform

In [None]:
time_data = data['Timestamp'].values

In [None]:
electrode1_data = data['El1'].values

In [None]:
time_data

In [None]:
electrode1_data

In [None]:
wavelet = 'db4'

In [None]:
coeffs = pywt.wavedec(electrode_data, wavelet)

In [None]:
coeffs

In [None]:
type(coeffs)

In [None]:
len(coeffs)

In [None]:
# Plot the wavelet coefficients
levels = len(coeffs)
plt.figure(figsize=(12, 8))

for i in range(levels):
    level = levels - i
    plt.subplot(levels, 1, i+1)
    plt.plot(coeffs[i])
    plt.title(f'Level {level} Coefficients')
    plt.xlabel('Sample')
    plt.ylabel('Coefficient')

plt.show()

In [None]:
coeffs = pywt.wavedec(electrode1_data, 'db4', level=8)
reconstructed_signal = pywt.waverec(coeffs, 'db4')
 
fig, ax = plt.subplots(figsize=(8,4))
ax.plot(electrode1_data[:1000], label='signal')
ax.plot(reconstructed_signal[:1000], label='reconstructed signal', linestyle='--')
ax.legend(loc='upper left')
ax.set_title('de- and reconstruction using wavedec()')
plt.show()

In [None]:
signal = electrode1_data
def lowpassfilter(signal, thresh = 0.63, wavelet="db4"):
    thresh = thresh*np.nanmax(signal)
    coeff = pywt.wavedec(signal, wavelet, mode="per" )
    coeff[1:] = (pywt.threshold(i, value=thresh, mode="soft" ) for i in coeff[1:])
    reconstructed_signal = pywt.waverec(coeff, wavelet, mode="per" )
    return reconstructed_signal
 
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(signal, color="b", alpha=0.5, label='original signal')
rec = lowpassfilter(signal, 0.4)
ax.plot(rec, 'k', label='DWT smoothing}', linewidth=2)
ax.legend()
ax.set_title('Removing High Frequency Noise with DWT', fontsize=18)
ax.set_ylabel('Signal Amplitude', fontsize=16)
ax.set_xlabel('Sample No', fontsize=16)
plt.show()

In [None]:
data = 

In [None]:
import numpy as np
import pywt

# Assuming you have your dataset loaded into a pandas DataFrame called 'data'
# 'timestamps' represents the column with timestamps
# 'electrode_columns' represents the list of columns containing electrode readings

# Define the wavelet function
wavelet = 'db4'  # Select a suitable wavelet, such as Daubechies 4

# Create an empty list to store the wavelet coefficients
wavelet_coefficients = []

electrode_columns = ['El1', 'El2', 'El3','El4', 'El5', 'El6','El7', 'El8']

signal = data['El1'].values  # Get the electrode readings as a numpy array
coefficients = pywt.wavedec(signal, wavelet, level=5)  # Perform wavelet decomposition

    # Append the coefficients to the list
wavelet_coefficients.extend(coefficients)

# Convert the list of coefficients to a numpy array
wavelet_coefficients = np.array(wavelet_coefficients,dtype=object )


wavelet_coefficients

In [None]:

# Reshape the coefficients into a suitable format for CNN input
# The dimensions depend on the number of electrodes and the number of levels in the wavelet decomposition
num_electrodes = 1
num_levels = 5
num_samples = len(data)  # Assuming each row represents a sample
num_coeffs = wavelet_coefficients.shape[0]

#reshaped_coefficients = wavelet_coefficients.reshape(num_samples, num_electrodes, num_levels, -1)

# Now 'reshaped_coefficients' can be used as input for training a CNN model
# You can proceed with further steps such as splitting the data, normalization, and training the model
reshaped_coefficients = wavelet_coefficients.reshape(num_samples, -1, num_coeffs)


In [None]:
import numpy as np
import pywt

# Assuming you have your dataset loaded into a pandas DataFrame called 'data'
# 'timestamps' represents the column with timestamps
# 'electrode_columns' represents the list of columns containing electrode readings

# Define the wavelet function
wavelet = 'db4'  # Select a suitable wavelet, such as Daubechies 4

# Create an empty list to store the wavelet coefficients
wavelet_coefficients = []

electrode_columns = ['El1', 'El2', 'El3','El4', 'El5', 'El6','El7', 'El8']

# Iterate through each electrode column
for column in electrode_columns:
    # Apply wavelet transform on each column
    signal = data[column].values  # Get the electrode readings as a numpy array
    coefficients = pywt.wavedec(signal, wavelet, level=5)  # Perform wavelet decomposition
    print(len(coefficients))
    # Append the coefficients to the list
    wavelet_coefficients.append(coefficients)

# Convert the list of coefficients to a numpy array
wavelet_coefficients = np.array(wavelet_coefficients, dtype=object)

# Get the dimensions of the coefficients array
num_electrodes = len(electrode_columns)
num_samples = len(data)  # Assuming each row represents a sample

# Reshape the coefficients into a suitable format for CNN input
reshaped_coefficients = wavelet_coefficients.reshape(wavelet_coefficients.shape[1], num_electrodes, -1)

# Now 'reshaped_coefficients' can be used as input for training a CNN model
# You can proceed with further steps such as splitting the data, normalization, and training the model


In [None]:
wavelet_coefficients.shape[1]

In [None]:
data = pd.read_csv("first_try/004a-open-0.csv")

In [None]:
del data[data.columns[0]]


In [None]:
data2 = data.values

In [None]:
data2.shape[1]

In [None]:
data

In [None]:
import numpy as np
import pywt
import cv2

# Load your data
# Assuming you have a 2D array with dimensions (num_samples, num_channels)

# Define the wavelet and desired number of scales
wavelet = 'morl'  # Choose the wavelet function
num_scales = 6  # Number of scales for CWT decomposition

# Create an empty list to store the CWT coefficients
cwt_coefficients = []

# Apply CWT to each sample in your data
for sample in data2:
    scales = pywt.central_frequency(wavelet, precision=8) * np.log2(num_scales)
    cwt_matrix, frequencies = pywt.cwt(sample, scales, wavelet)
    print(frequencies)
    # Take the absolute value of the coefficients and normalize them
    cwt_matrix = np.abs(cwt_matrix)
    cwt_matrix /= np.max(cwt_matrix)

    # Resize the coefficients to a fixed size (e.g., 32x32)
    resized_cwt = cv2.resize(cwt_matrix, (32, 32))

    # Append the resized coefficients to the list
    cwt_coefficients.append(resized_cwt)

# Convert the list to a numpy array
cwt_coefficients = np.array(cwt_coefficients)

# Reshape the array to match the input shape of the CNN
cwt_coefficients = cwt_coefficients.reshape(-1, 32, 32, 1)

# Now you can use the cwt_coefficients as input to your CNN model
# ...


In [None]:
cwt_coefficients.shape[0]

In [None]:
import numpy as np
import pandas as pd
import pywt

# Load your dataset into a DataFrame
df = pd.read_csv('first_try/004a-open-0.csv', sep='\s+')

electrode_columns = df.columns[2:]  

n_samples = df.shape[0]
n_channels = len(electrode_columns)

# Define the wavelet parameters
waveletname = 'morl'
scales = range(1, 128)  # Adjust the range of scales according to your needs

# Create an empty array to store the wavelet coefficients
data_cwt = np.ndarray(shape=(n_samples, 127, 127, n_channels))

# Process each sample in the dataset
for i in range(n_samples):
    if i % 100 == 0:
        print(f"Processing sample {i}")
    for j, column in enumerate(electrode_columns):
        signal = df[column].iloc[i]
        coeff, freq = pywt.cwt(signal, scales, waveletname, 1)
        coeff_ = coeff[:, :127]  # Resize the coefficients to (127, 127)
        data_cwt[i, :, :, j] = coeff_

# Prepare the labels (adjust this part according to your dataset)
labels = df['label'].tolist()

# Split the data into train and test sets (adjust the splitting logic according to your needs)
train_size = int(0.8 * n_samples)
x_train = data_cwt[:train_size]
y_train = labels[:train_size]
x_test = data_cwt[train_size:]
y_test = labels[train_size:]


In [None]:
print(df.iloc[0, 0+1])

In [None]:
print(df)

In [20]:
import numpy as np
import pandas as pd
import pywt
import glob

# Create an empty list to store the truncated data
truncated_data = []

# Specify the folder path where the files are located
folder_path = "first_try/"

# Get a list of all files in the folder
file_list = glob.glob(folder_path + "/*.csv")

# Loop through each file
for file in file_list:
    # Read the file into a DataFrame
    df = pd.read_csv(file)
    
    # Truncate the DataFrame to 200 rows
    truncated_df = df[:200]
    
    # Add the truncated DataFrame to the list
    truncated_data.append(truncated_df)

# Concatenate all DataFrames in the list into a single DataFrame
combined_df = pd.concat(truncated_data, ignore_index=False)

In [21]:
combined_df

Unnamed: 0,1643731093570,164,149,122,196,134,104,98,42,1643731153238,...,53,1643730485373,80,233,110,70,1643730582564,106,195,68.1
0,1.643731e+12,173.0,154.0,121.0,214.0,141.0,134.0,108.0,48.0,,...,,,,,,,,,,
1,1.643731e+12,148.0,136.0,123.0,228.0,131.0,135.0,113.0,51.0,,...,,,,,,,,,,
2,1.643731e+12,146.0,148.0,118.0,202.0,109.0,136.0,112.0,49.0,,...,,,,,,,,,,
3,1.643731e+12,147.0,146.0,119.0,190.0,96.0,142.0,111.0,45.0,,...,,,,,,,,,,
4,1.643731e+12,118.0,144.0,117.0,199.0,96.0,150.0,114.0,47.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,,,,,,,,63.0,,,...,,,,,,,1.643731e+12,93.0,202.0,77.0
196,,,,,,,,67.0,,,...,,,,,,,1.643731e+12,101.0,207.0,81.0
197,,,,,,,,67.0,,,...,,,,,,,1.643731e+12,106.0,208.0,84.0
198,,,,,,,,86.0,,,...,,,,,,,1.643731e+12,121.0,228.0,98.0


In [28]:
transformed_data = []

scales = np.arange(1, 128)


for file in file_list:
    df = pd.read_csv(file)  # Assuming the data file is in CSV format
    
    # Apply continuous wavelet transform to each electrode column
    for col in df.columns[1:]:  # Assuming electrode columns start from index 1
        signal = df[col].values  # Get the values of the electrode column
        coeff, freq = pywt.cwt(signal, scales, waveletname, 1)
        #print(len(coeff))# Apply continuous wavelet transform
        transformed_data.append(coeff.T)  # Append the transformed data to the list



transformed_data
#transformed_data = np.array(transformed_data, dtype=object)

df = pd.DataFrame(transformed_data)

#transformed_data = transformed_data.reshape(len(file_list), 200, 8)


  values = np.array([convert(v) for v in values])


In [17]:
transformed_data.shape[0]

192

In [18]:
len(transformed_data)

192

## END

In [None]:
"""
0 - close
1 - grasp
2 - lateral
3 - neutral
4 - open 
5 - tripod

"""

In [None]:
combined_df.isnull().sum().sort_values(ascending=False)

In [None]:
skewness = combined_df.skew(axis = 0, skipna = True)
skewness

In [None]:
# Data is slightly skewed


## Splitting data according to Patient IDs

In [None]:
patients = combined_df['P_ID'].unique()

In [None]:
gf = combined_df.groupby('P_ID')

gf.first('P_ID')


In [None]:
# Creating a dataframe for each P_ID
d = {}
for p in patients:
    d[p] = pd.DataFrame()

len(patients)

In [None]:
for p, new_df in d.items():
    d[p] = gf.get_group(p)
    print(p)

In [None]:
d["004"]