In [None]:
import numpy as np
import os
import pandas as pd
from scipy.io import loadmat
import matplotlib.pyplot as plt
from scipy import signal
from sklearn.cross_decomposition import CCA

In [None]:
# Dictionary for original data
original = {}

# File path
directory = "Data_trials"

for participant_folder in os.listdir(directory):
    participant_path = os.path.join(directory, participant_folder)
    if os.path.isdir(participant_path):
        participant_number = participant_folder[1:]  # Extract participant numbers from folder

        for file_name in os.listdir(participant_path):
            if file_name.endswith(".mat") and not file_name.endswith(("5.mat", "6.mat")):
                file_path = os.path.join(participant_path, file_name)

                mat_data = loadmat(file_path)

                keys = mat_data.keys()
                key = list(keys)[3]

                # Create dataframe
                df = pd.DataFrame(mat_data[key].T, columns=['TimeStamps','PO3', 'POz', 'PO4', 'O1', 'Oz', 'O2'])

                # Add data to the dictionary
                if key not in original:
                    original[key] = []
                original[key].append(df)

#Oz channel
oz = {}
for key, dfs in original.items():
    oz[key] = []
    for df in dfs:
        oz_data = df['Oz']
        oz[key].append(oz_data)

In [None]:
# Sampling frequency calculation

data = original['P01_T1_R1_1'][0]
time_diff = data['TimeStamps'].diff().mean()
fs = 1 / time_diff
print("Sampling frequency =", fs, "Hz")

In [None]:
#Trim EEg signal without stimulous (First and Last half second)

num_samples_to_trim = int(0.5 * fs)

for key, dfs in original.items():
    trimmed_dfs = []
    for df in dfs:
        df_trimmed = df.iloc[num_samples_to_trim:-(num_samples_to_trim)].reset_index(drop=True)
        trimmed_dfs.append(df_trimmed)
    original[key] = trimmed_dfs

In [None]:
# Gr치ficos relativos apenas um gr치fico do ensaio 1 do teste 1 do paciente 1
for key, dfs in oz.items():
    if key == "P01_T1_R1_1":
        plt.title(f'{key}_Oz')
        for data in dfs:
            data_subset = data.iloc[:200]
            plt.plot(data_subset)
        plt.xlabel('TimeStamps')
        plt.ylabel('Valor')
        plt.legend()
        plt.show()

In [None]:
#Filters parameters
notch_freq = 50.0 
quality_factor = 40.0
highcut = 20
order = 8
lowcut = 5

#Lowpass, Highpass and Notch
sos = signal.iirfilter(order, highcut, btype='lowpass', analog=False, ftype='butter', fs=fs, output='sos')
b_notch, a_notch = signal.iirnotch(notch_freq, quality_factor, fs)
b_hp, a_hp = signal.butter(order, lowcut, btype='highpass', fs=fs)

#Dictionary for filtered data
filtrado= {}

#Low and High pass filter application
for key, dfs in original.items():
    filtrado[key] = []
    for df in dfs:
        timestamps = df['TimeStamps']
        df_without_timestamps = df.drop(columns=['TimeStamps'])
        df_filtrado_lp = pd.DataFrame(signal.sosfiltfilt(sos, df_without_timestamps.values, axis=0), columns=df_without_timestamps.columns)
        df_filtrado_lphp = pd.DataFrame(signal.filtfilt(b_hp, a_hp, df_filtrado_lp.values, axis=0), columns=df_without_timestamps.columns)
        df_filtrado = pd.concat([timestamps, df_filtrado_lphp], axis=1)
        filtrado[key].append(df_filtrado)

#Notch filter application
for key, dfs in filtrado.items():
    for df in dfs:
        for column in df.columns[1:]:
            df[column] = signal.filtfilt(b_notch, a_notch, df[column])

#Filtered Oz channel
oz_filtered = {}
for key, dfs in filtrado.items():
    oz_filtered[key] = []
    for df in dfs:
        oz_filtered_data = df['Oz']
        oz_filtered[key].append(oz_filtered_data)

In [None]:
# Gr치ficos relativos apenas um gr치fico do ensaio 1 do teste 1 do paciente 1
for key, dfs in oz_filtered.items():
    if key == "P01_T1_R1_1":
        plt.title(f'{key}_Oz')
        for data in dfs:
            data_subset = data.iloc[:200]
            plt.plot(data_subset)
        plt.xlabel('TimeStamps')
        plt.ylabel('Valor')
        plt.legend()
        plt.show()

In [None]:
# Get the first key data
first_key = list(oz_filtered.keys())[0]
first_matrix = np.array(oz_filtered[first_key])
X = first_matrix.T

# Creating the time series windows
window_size = X.shape[0] 
t = np.linspace(0, 4, window_size, endpoint=False)

# Generating sine and cosine reference signals
frequencies = [7, 11, 13, 17]
reference_signals = []
for freq in frequencies:
    sine_wave = 10 * np.sin(2 * np.pi * freq * t)
    cosine_wave = 10 * np.cos(2 * np.pi * freq * t)
    reference_signals.append(sine_wave)
    reference_signals.append(cosine_wave)

ref = np.array(reference_signals).T

In [None]:
# Correlations calculation Function
def calculate_correlations(matrix, reference_signals):
    cca = CCA(n_components=1)  # Canonic components number
    Correlation = []

    for ref_signal in reference_signals:
        cca.fit(matrix, ref_signal)
        x1, x2 = cca.transform(matrix, ref_signal)
        corr = np.corrcoef(x1.T, x2.T)[0, 1]
        Correlation.append(corr)
    
    return Correlation

In [None]:
all_correlations = {}

total_correct = 0
total_trials = 0

for key in oz_filtered:
    matrix = np.array(oz_filtered[key]).T
    correlations = calculate_correlations(matrix, reference_signals)
    max_correlation_index = np.argmax(correlations)
    all_correlations[key] = {'correlations': correlations, 'max_correlation_index': max_correlation_index // 2 + 1}
    
    last_character = key[-1]
    if str(max_correlation_index // 2 +1) == last_character:
        total_correct += 1
    total_trials += 1

# Accuracy
accuracy = total_correct / total_trials

for key, correlations in all_correlations.items():
    print(f"{key}: {all_correlations[key]}")

print(f"Total de trials: {total_trials}")
print(f"Total de respostas corretas: {total_correct}")
print(f"Accuracy: {accuracy * 100:.2f}%")