# Title: Transformer Model (MSC/MPC Features)

This notebook demonstrates how to train and evaluate a CNN_LSTM model on EEG data (processed via MSC/MPC features). It includes steps for data loading, augmentation, dimensionality reduction (PCA), training, confusion matrix visualization, per-class bar charts, and an accuracy comparison with/without PCA.

In [None]:
import scipy.io
import os
import pandas as pd
import numpy as np
data=pd.read_csv('EEG_data_subject4', sep="\t") #loading data from dataset

In [None]:
df_imagined=pd.read_csv('EEG_data_subject3',sep="\t")
df_imagined.drop(["Time"],axis=1,inplace=True)

In [None]:
df_imagined

# Loading EEG data
This section loads the EEG datasets, merges them if necessary, and displays initial information about the data structures. We then define some variables and dictionaries related to the classification labels (words).

In [None]:
df_inner=pd.read_csv('EEG_data_subject4',sep="\t")
df_inner.drop(["Time"],axis=1,inplace=True)
df_combined=pd.concat([df_imagined,df_inner])

In [None]:
data=np.array(df_imagined)
data=data.transpose()
data.shape
y_array=np.load("labels_word_list.npy")
# y_array=np.array(list(y_array)*2) --for combined data
y_array[:10]

In [None]:
num_dict={'date':0,'goose':1,'spruce':2,'knight':3,'juice':4,'moose':5,'night':6,'queen':7,'berry':8,'hedgehog':9,'water':10,'daughter':11,'gooseberry':12,'waterfowl':13,'wilderness':14,'relative':15,'watermelon':16,'caterpillar':17,'environment':18,'ambassador':19}

# 2) Data Epoching / Segmenting the Data

In [None]:
# We break the EEG data into epochs of 256 samples, you are effectively segmenting the data into 1-second intervals.
print("Epoching data...")
n=int(data.shape[1]/256)
epoched_data=[]
for i in range(n):
    epoched_data.append(data[:,256*i:256*(i+1)])
epoched_data=np.array(epoched_data)

# Shuffling / random permutation
Ensures randomization of the epochs and their labels.

In [None]:
# Apply random permutation
p=np.random.RandomState(seed=42).permutation(len(y_array))
epoched_data=epoched_data[p]
y_array=y_array[p]
epoched_data.shape  #(num_epochs,num_channels,epoch_length)

# Data Augmentation
Creates additional “sub-epochs” by sliding windows within each original epoch.

Here, we create overlapping sub‐epochs (128 samples each, with a certain overlap) to artificially increase the number of training examples. Then we replicate labels accordingly.


In [None]:
augmented_data=[]
n=epoched_data.shape[0]
length=128
overlap=32
for i in range(n):
    for j in range(0,length+1,overlap):
        augmented_data.append(epoched_data[i][:,j:j+length])
augmented_data=np.array(augmented_data)
augmented_data.shape #(num_epochs,num_channels,epoch_length)

In [None]:
epoched_data=augmented_data

In [None]:
new_array=[]
x=int(augmented_data.shape[0]/5000) #/1000 for 5000
for word in y_array:
    for i in range(x):
        new_array.append(word)
y_array=np.array(new_array)


In [None]:
epoched_data.shape

In [None]:
# Adjust Label for Augmented Data. i.e Replicate each label the appropriate number of times
# You must replicate each label 5 times

augmented_labels = []
for label in y_array:
    for _ in range(5):  # or however many sub-trials you have per original
        augmented_labels.append(label)

y_array = np.array(augmented_labels)
print("y_array.shape:", y_array.shape)  # should now be (5000,)


# Creating continuous data from epoched data

We reshape/concatenate the epochs so we can perform subsequent filtering (alpha, beta, gamma extraction) using MNE functions.


In [None]:
#creating continuous data from epoched data
continuous_data=[]
trials=epoched_data.shape[0]
channels=epoched_data.shape[1]
sample_size=epoched_data.shape[2]
for i in range(channels):
    continuous_data.append(epoched_data[:,i].reshape(trials*sample_size))
continuous_data=np.array(continuous_data)
continuous_data.shape


# **Extracting Alpha, Beta, Gamma Bands with MNE from continuous data**

We apply notch filtering at 60 Hz, downsample, and specifically extract alpha, beta, and gamma bands for each channel, then re‐epoch them.


In [None]:
pip install mne --quiet

In [None]:
# Setup MNE for filtering
import mne
sfreq=128
ch_names=["F3","FC5","AF3","F7","T7","P7","O1","O2","P8","T8","F8","AF4","FC6","F4"] 
info=mne.create_info(ch_names,sfreq=sfreq)
raw=mne.io.RawArray(continuous_data,info)
raw.plot(scalings = 'auto');
print("Filtering EEG bands...")

# Filtering and downsampling

Filtering

In [None]:
# Notch filter at 60 Hz
raw.notch_filter(60,picks='all')

Downsampling

In [None]:
# (Optional) downsampling
raw.resample(120, npad='auto')
# Notice that the max plotted frequency is 60, the nyquist rate!

In [None]:
# Extracting the alpha, beta, gamma from EEG
# Uses MNE‐Python’s FIR filter to isolate each band.

alpha_continuous=mne.filter.filter_data(continuous_data,128,8,12)
beta_continuous=mne.filter.filter_data(continuous_data,128,12,30)
gamma_continuous=mne.filter.filter_data(continuous_data,128,30,50)



In [None]:
print("alpha_continuous shape:", alpha_continuous.shape)

In [None]:
#epoching all the frequency bands

trial_duration=epoched_data.shape[2] #trial duration
n=epoched_data.shape[0]
alpha_epoched=[]
beta_epoched=[]
gamma_epoched=[]
for i in range(n):
    alpha_epoched.append(alpha_continuous[:,i*trial_duration:(i+1)*trial_duration])
    beta_epoched.append(beta_continuous[:,i*trial_duration:(i+1)*trial_duration])
    gamma_epoched.append(gamma_continuous[:,i*trial_duration:(i+1)*trial_duration])
alpha_epoched=np.array(alpha_epoched)
beta_epoched=np.array(beta_epoched)
gamma_epoched=np.array(gamma_epoched)
    

In [None]:
print("alpha_epoched shape:", alpha_epoched.shape)  #(num_epochs,num_channels,epoch_length)

# **Feature Extraction (MPC & MSC)**

We compute Mean Phase Coherence (MPC) and Magnitude Squared Coherence (MSC) for alpha, beta, gamma bands, and assemble them into a feature vector. This process can be computationally heavy, so a progress indicator (`print(i,end=' ')`) is shown.


In this second technique, the features,  **mean phase coherence (MPC)** is extracted along with **magnitude-squared coherence (MSC)** from the augmented data \citep{panachakel2021decoding}. MPC between two EEG channels is described as a measure of their phase synchronisation. The mean phase coherence (MPC) between two EEG signals with instantaneous phase difference $
    \phi(t)=\phi_1(t)-\phi_2(t)
$ can be estimated via,

$
    \lambda = \frac{1}{N}\Bigg|\sum_{n=0}^{N-1}e^{j(\hat{\phi}_i(n))}\Bigg|
$
where $
    {(\hat{\phi}_i(n))}_{n=0}^{N-1}
$ is the estimation of $
    \phi(t)
$,
where N is the number of samples,and the instantaneous phases are computed using Hilbert transform.
Where as, if a pair of signals are in spectral domain, MSC computes the linear relationship between them. Hamming window is used for this process. Let the auto-spectral densities and the cross-spectral density  of $
    x(t)
$ and $
    y(t)
$  be denoted by $
    P_{xx}(f)
$, $
    P_{yy}(f)
$  and $
    P_{xy}(f)
$ respectively at frequency f. The MSC between them is given by:

$
    \gamma _{xy}^{2} (f) = {{\left\vert {P_{xy} (f)} \right\vert^{2} } \over {P_{xx} (f)P_{yy} (f)}} 
$

**MSC (Magnitude Squared Coherence)**:  Measures the strength of correlation between two EEG signals in the frequency domain.

**MPC (Mean Phase Coherence)**:  Measures how well two EEG signals stay in phase over time.

In [None]:
#calculation of hilbert array from augmented array
from scipy.signal import hilbert
from scipy.signal import welch
from scipy.signal import csd
#Defining magnitude squared coherence
import numpy as np
from scipy.signal import welch
from scipy.signal import csd,coherence

def msc(arr1,arr2):
    #Magnitude‐Squared Coherence (MSC)
    #Measures the linear correlation in the frequency domain.
    msc=coherence(arr1,arr2,fs=128,window="hamm",nperseg=8)[1]
    return np.mean(msc)

In [None]:
#Defining mean phase coherence
from scipy.signal import hilbert
import numpy as np
def mpc(arr1,arr2):
    imag_1=np.imag(hilbert(arr1))
    imag_2=np.imag(hilbert(arr2))
    phase_1=np.arctan(np.divide(imag_1, arr1, out=np.zeros_like(imag_1), where=arr1!=0))
    phase_2=np.arctan(np.divide(imag_2, arr2, out=np.zeros_like(imag_2), where=arr2!=0))
    phase_diff=(phase_1-phase_2)
    mpc=np.linalg.norm(np.sum(np.exp(1j*phase_diff)))/len(arr1)
    return mpc
print("Calculating coherence matrices ...")

In [None]:
n=alpha_epoched.shape[0]
m=alpha_epoched.shape[1]
l=alpha_epoched.shape[2]

mpc_alpha=np.zeros([n,m,m])
mpc_beta=np.zeros([n,m,m])
mpc_gamma=np.zeros([n,m,m])

msc_alpha=np.zeros([n,m,m])
msc_beta=np.zeros([n,m,m])
msc_gamma=np.zeros([n,m,m])

print(mpc_gamma.shape)


In [None]:
for i in range(n):
    for j in range(m):
        for k in range(m):
            mpc_alpha[i][j][k]=mpc(alpha_epoched[i][j],alpha_epoched[i][k])
            msc_alpha[i][j][k]=msc(alpha_epoched[i][j],alpha_epoched[i][k])
            
            mpc_beta[i][j][k]=mpc(beta_epoched[i][j],beta_epoched[i][k])
            msc_beta[i][j][k]=msc(beta_epoched[i][j],beta_epoched[i][k])
            
            mpc_gamma[i][j][k]=mpc(gamma_epoched[i][j],gamma_epoched[i][k])
            msc_gamma[i][j][k]=msc(gamma_epoched[i][j],gamma_epoched[i][k])
    print(i,end=' ')

print("\nMPC/MSC calculations complete!")

# **Creating the Feature Vector**

We pack MPC (above the diagonal) and MSC (below the diagonal) for alpha, beta, gamma into a single 3D array, then flatten.


In [None]:
mpc_alpha_copy=mpc_alpha
mpc_beta_copy=mpc_beta
mpc_gamma_copy=mpc_gamma

msc_alpha_copy=msc_alpha
msc_beta_copy=msc_beta
msc_gamma_copy=msc_gamma

In [None]:
msc_alpha_copy.shape

In [None]:
mpc_alpha_copy.shape

In [None]:
files=[mpc_alpha,mpc_beta,mpc_gamma,msc_alpha,msc_beta,msc_gamma]
file_names=["mpc_alpha","mpc_beta","mpc_gamma","msc_alpha","msc_beta","msc_gamma"]

In [None]:
part="01"
file_type="inner"
aug_type="non-aug"

In [None]:

#saving the files
for i in range(len(files)):
    path=''+part+'/'+part+'_'+file_type+'/'+aug_type+'/'+file_names[i]+'.npy'
    np.save(path,files[i])

In [None]:
#loading the files
for i in range(len(files)):
    path=''+part+'/'+part+'_'+file_type+'/'+aug_type+'/'+file_names[i]+'.npy'
    files[i]=np.load(path)

In [None]:
mpc_alpha=files[0]
mpc_beta=files[1]
mpc_gamma=files[2]

msc_alpha=files[3]
msc_beta=files[4]
msc_gamma=files[5]

In [None]:
msc_alpha.shape

In [None]:
mpc_alpha.shape

This section is used to create the feature vector

In [None]:
n_1=msc_alpha.shape[0]
m_1=msc_alpha.shape[1]
x_array_2=np.zeros([n_1,m_1,m_1,3])
print("mpc_alpha shape:", mpc_alpha.shape)
print("mpc_beta shape:", mpc_beta.shape)
print("mpc_gamma shape:", mpc_gamma.shape)

print("msc_alpha shape:", msc_alpha.shape)
print("msc_beta shape:", msc_beta.shape)
print("msc_gamma shape:", msc_gamma.shape)


In [None]:
for i in range(n_1):
    for j in range(m_1):
        for k in range(m_1):
            if j<k:
                x_array_2[i][j][k]=[mpc_alpha[i][j][k],mpc_beta[i][j][k],mpc_gamma[i][j][k]]
            elif j>k:
                x_array_2[i][j][k]=[msc_alpha[i][j][k],msc_beta[i][j][k],msc_gamma[i][j][k]]

In [None]:
x_array_cnn=x_array_2

In [None]:
a=x_array_2.reshape(n_1,m_1*m_1*3)
a=a[a!=0.0]
a.shape
x_array_2=a.reshape(n_1,m_1*m_1*3-m_1*3)

In [None]:
x_array_2.shape

In [None]:
np.save(''+part+'/'+part+'_'+file_type+'_'+"X",x_array_2)
np.save(''+part+'/'+part+'_'+file_type+'_'+"Y",y_array)

In [None]:
x_array=np.load(''+part+'/'+part+'_'+file_type+'_'+"X"+".npy")
y_array=np.load(''+part+'/'+part+'_'+file_type+'_'+"Y"+".npy")

In [None]:
x_array = x_array_2

In [None]:
x_array.shape

# Reshaping x_array for a “Single Time-Step” Transformer


In [None]:
import numpy as np

N, F = x_array.shape
seq_len = 1
input_dim = F

# Reshape from (N, F) -> (N, 1, F)
X_seq = x_array.reshape(N, seq_len, input_dim)

print("X_seq shape:", X_seq.shape)  # (N, 1, F)


In order to reduce the dimension of the feature vector Principal component analysis (PCA) was used. 

# **Principal component analysis (PCA) / Standardization**
Standard scaling is also used to zero‐mean/unit‐variance the features.
We apply PCA to capture 95% variance. This yields the “with PCA” scenario. We will compare it to a “without PCA” scenario later.


In [None]:
from sklearn.decomposition import PCA
x_array_without_pca = x_array
pca=PCA(.95)
pca.fit(x_array)
x_array=pca.transform(x_array)
x_array_with_pca=x_array
print('x_array_without_pca',x_array_without_pca.shape,"[x_array]:",x_array.shape)

In [None]:
x_array.shape

In [None]:
y_array.shape

# Build a PyTorch Dataset & DataLoader

In [None]:
pip install torch torchvision torchaudio  --quiet

# Prepare Multi-Step X_seq

In [None]:
import numpy as np

# Suppose epoched_data has shape (N, channels, time) = (1000, 14, 128)
# and y_array has shape (N,) = (1000,)

N, channels, time = epoched_data.shape  # e.g. (1000, 14, 128)

# We want the Transformer to see `seq_len = time` steps, each step is 'channels' features.
# So final shape => (N, seq_len=128, input_dim=14).

# Just transpose axis 1 and 2:
X_seq = np.transpose(epoched_data, (0, 2, 1))  # => (N, 128, 14)

print("X_seq shape:", X_seq.shape)       # (1000, 128, 14)
print("y_array shape:", y_array.shape)   # (1000,)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class EEGTimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        """
        X shape: (num_samples, seq_len, input_dim)
        y shape: (num_samples,)
        """
        self.X = X
        self.y = y
        assert len(self.X) == len(self.y), "Mismatch in samples"

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # shape: (seq_len, input_dim)
        x_item = self.X[idx]
        y_item = self.y[idx]
        # Convert to torch tensors
        x_item = torch.tensor(x_item, dtype=torch.float32)
        y_item = torch.tensor(y_item, dtype=torch.long)
        return x_item, y_item

# Create dataset
dataset = EEGTimeSeriesDataset(X_seq, y_array)

# Train/val/test split
num_samples = len(dataset)
indices = np.arange(num_samples)
np.random.shuffle(indices)

train_size = int(0.8 * num_samples)
val_size   = int(0.1 * num_samples)
test_size  = num_samples - train_size - val_size

train_idx = indices[:train_size]
val_idx   = indices[train_size : train_size+val_size]
test_idx  = indices[train_size+val_size:]

X_train, y_train = X_seq[train_idx], y_array[train_idx]
X_val,   y_val   = X_seq[val_idx],   y_array[val_idx]
X_test,  y_test  = X_seq[test_idx],  y_array[test_idx]

train_ds = EEGTimeSeriesDataset(X_train, y_train)
val_ds   = EEGTimeSeriesDataset(X_val,   y_val)
test_ds  = EEGTimeSeriesDataset(X_test,  y_test)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=16, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=16, shuffle=False)

print(f"Train size: {len(train_ds)} | Val size: {len(val_ds)} | Test size: {len(test_ds)}")


Small Transformer Model

Here’s a light version of the code you saw earlier, but with logs. Note that seq_len=1 means we basically do a single “time step” pass, so the Transformer encoder is effectively operating on length 1. This is almost like an MLP—but it satisfies your professor’s requirement to use a “prediction model” (Transformer code path).

In [None]:
import torch.nn as nn
import math

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)  # shape (1, max_len, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x shape: (batch_size, seq_len, d_model)
        seq_len = x.size(1)
        x = x + self.pe[:, :seq_len, :]
        return x

class SmallTransformer(nn.Module):
    def __init__(self, input_dim, d_model=64, nhead=4, num_layers=2, 
                 dim_feedforward=128, num_classes=20, dropout=0.1, pool="mean"):
        super().__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.pool = pool
        self.classifier = nn.Linear(d_model, num_classes)

    def forward(self, x):
        """
        x shape: (batch_size, seq_len=128, input_dim=14)
        returns: (batch_size, num_classes)
        """
        # 1) Embed
        x_embed = self.embedding(x)             # => (batch_size, seq_len, d_model)
        # 2) Positional Encoding
        x_pe = self.pos_encoder(x_embed)        # => (batch_size, seq_len, d_model)
        # 3) Transformer Encoder
        enc_out = self.transformer_encoder(x_pe)# => (batch_size, seq_len, d_model)
        
        # 4) Pool across time dimension
        if self.pool == "mean":
            pooled = enc_out.mean(dim=1)        # => (batch_size, d_model)
        else:
            pooled = enc_out[:, -1, :]         # => (batch_size, d_model)

        # 5) Classify
        logits = self.classifier(pooled)        # => (batch_size, num_classes)
        return logits

print("Multi-step Transformer code loaded!")


Training & Logging

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import f1_score

# Set the device based on availability (MPS for Apple Silicon, CUDA for NVIDIA, else CPU)
device = (
    "mps" if torch.backends.mps.is_available() 
    else "cuda" if torch.cuda.is_available() 
    else "cpu"
)
print("Using device:", device)

num_classes = 20  # Adjust based on your dataset

#########################################################
# 1) Model, Criterion, Optimizer
#########################################################
model = SmallTransformer(
    input_dim=14,  # e.g. 14 EEG channels
    d_model=64,
    nhead=4,
    num_layers=2,
    dim_feedforward=128,
    num_classes=num_classes,
    dropout=0.1,
    pool="mean"
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

#########################################################
# 2) Training/Validation Setup
#########################################################
max_epochs = 100
train_losses = []
val_accuracies = []

val_f1_scores = []


#########################################################
# 3) Main Training Loop (No Early Stopping)
#########################################################
for epoch in range(1, max_epochs + 1):
    #######################################################
    # A) TRAINING
    #######################################################
    model.train()
    running_loss = 0.0

    for batch_x, batch_y in train_loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        logits = model(batch_x)
        loss = criterion(logits, batch_y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    #######################################################
    # B) VALIDATION (just for monitoring, no early stop)
    #######################################################
    model.eval()
    val_preds = []
    val_true  = []
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            logits = model(batch_x)
            preds = logits.argmax(dim=-1)
            val_preds.extend(preds.cpu().numpy())
            val_true.extend(batch_y.cpu().numpy())


    # Compute validation accuracy and F1 score
    acc = accuracy_score(val_true, val_preds)
    f1 = f1_score(val_true, val_preds, average='weighted')  # or 'macro' depending on your needs
    val_accuracies.append(acc)
    val_f1_scores.append(f1)

    # Print training info each epoch
    print(f"[Epoch {epoch}/{max_epochs}] Train Loss: {avg_train_loss:.4f} | Val Acc: {acc:.4f} | Val F1: {f1:.4f}")

#########################################################
# 4) Evaluate on Test Set After Full 500 Epochs
#########################################################
model.eval()
test_preds = []
test_true  = []
with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)
        logits = model(batch_x)
        preds = logits.argmax(dim=-1)
        test_preds.extend(preds.cpu().numpy())
        test_true.extend(batch_y.cpu().numpy())

test_acc = accuracy_score(test_true, test_preds)
print(f"\nFinal Test Accuracy after {max_epochs} epochs: {test_acc:.4f}")

#  #Accuracy and F1-Score without PCA Transformer 83 Epochs

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Convert accuracies to percentages
val_accuracies_percent = [acc * 100 for acc in val_accuracies]
title = "Transformer Relationship between Accuracy and F1-Score Without PCA"
plt.figure(figsize=(8, 6))
plt.scatter(val_accuracies_percent, val_f1_scores, c=range(len(val_accuracies_percent)), cmap='viridis', marker='o')
plt.xlabel("Validation Accuracy (%)")
plt.ylabel("Validation F1-Score")
plt.title(title)
plt.grid(True)

# Define the save path
save_dir = "transformer"
os.makedirs(save_dir, exist_ok=True)
file_name = title + ".png"
save_path = os.path.join(save_dir, file_name)

# Save the plot with higher resolution (dpi=300) and tight layout
plt.savefig(save_path, dpi=900, bbox_inches='tight')
# print(f"Plot saved to {save_path}")

plt.show()

# Convert F1 scores to percentages (if needed for further processing)
val_f1_scores_percent = [score * 100 for score in val_f1_scores]


# Accuracy and F1-Score with PCA Transformer 100 Epochs


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Convert accuracies to percentages
val_accuracies_percent = [acc * 100 for acc in val_accuracies]
title = "Transformer Relationship between Accuracy and F1-Score With PCA"

plt.figure(figsize=(8, 6))
plt.scatter(val_accuracies_percent, val_f1_scores, c=range(len(val_accuracies_percent)), cmap='viridis', marker='o')
plt.xlabel("Validation Accuracy (%)")
plt.ylabel("Validation F1-Score")
plt.title(title)
plt.grid(True)

# Define the save path
save_dir = "transformer"
os.makedirs(save_dir, exist_ok=True)
file_name = title + ".png"
save_path = os.path.join(save_dir, file_name)

# Save the plot with high resolution before displaying
plt.savefig(save_path, dpi=900, bbox_inches='tight')
# print(f"Plot saved to {save_path}")

# Now display the plot
plt.show()

# Convert F1 scores to percentages (if needed for further processing)
val_f1_scores_percent = [score * 100 for score in val_f1_scores]


# Highest Accuracy & F1 Score

In [None]:
import numpy as np

# Assuming val_accuracies and val_f1_scores are lists with one entry per epoch
best_epoch_index = np.argmax(val_accuracies)
best_accuracy = val_accuracies[best_epoch_index] * 100  # converting to percentage for display
best_f1 = val_f1_scores[best_epoch_index]  # keep F1 as a number

print("Highest Accuracy: {:.2f}%".format(best_accuracy))
print("F1 Score corresponding to highest accuracy: {:.2f}".format(best_f1))




In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Convert accuracies to percentages
val_accuracies_percent = [acc * 100 for acc in val_accuracies]
title = "Fig 4.2.2 CNN LSTM - Accuracy and F1-Score With PCA"

plt.figure(figsize=(8, 6))
plt.scatter(val_accuracies_percent, val_f1_scores, c=range(len(val_accuracies_percent)), cmap='viridis', marker='o')
plt.xlabel("Validation Accuracy (%)")
plt.ylabel("Validation F1-Score")
plt.title(title)
plt.grid(True)

# Define the save path
save_dir = "transformer"
os.makedirs(save_dir, exist_ok=True)
file_name = title + ".png"
save_path = os.path.join(save_dir, file_name)

# Save the plot with high resolution before displaying
plt.savefig(save_path, dpi=900, bbox_inches='tight')
# print(f"Plot saved to {save_path}")

# Now display the plot
plt.show()

# Convert F1 scores to percentages (if needed for further processing)
val_f1_scores_percent = [score * 100 for score in val_f1_scores]


print("Highest Accuracy: {:.2f}%".format(best_accuracy))
print("F1 Score corresponding to highest accuracy: {:.2f}".format(best_f1))


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Convert accuracies to percentages
val_accuracies_percent = [acc * 100 for acc in val_accuracies]
title = "Fig 4.1.2 MultiLayer Perceptron - Accuracy and F1-Score With PCA"

plt.figure(figsize=(8, 6))
plt.scatter(val_accuracies_percent, val_f1_scores, c=range(len(val_accuracies_percent)), cmap='viridis', marker='o')
plt.xlabel("Validation Accuracy (%)")
plt.ylabel("Validation F1-Score")
plt.title(title)
plt.grid(True)

# Define the save path
save_dir = "transformer"
os.makedirs(save_dir, exist_ok=True)
file_name = title + ".png"
save_path = os.path.join(save_dir, file_name)

# Save the plot with high resolution before displaying
plt.savefig(save_path, dpi=900, bbox_inches='tight')
# print(f"Plot saved to {save_path}")

# Now display the plot
plt.show()

# Convert F1 scores to percentages (if needed for further processing)
val_f1_scores_percent = [score * 100 for score in val_f1_scores]


print("Highest Accuracy: {:.2f}%".format(best_accuracy))
print("F1 Score corresponding to highest accuracy: {:.2f}".format(best_f1))


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Convert accuracies to percentages
val_accuracies_percent = [acc * 100 for acc in val_accuracies]
title = "Fig 4.3.2 MultiLayer Perceptron - Accuracy and F1-Score With PCA"

plt.figure(figsize=(8, 6))
plt.scatter(val_accuracies_percent, val_f1_scores, c=range(len(val_accuracies_percent)), cmap='viridis', marker='o')
plt.xlabel("Validation Accuracy (%)")
plt.ylabel("Validation F1-Score")
plt.title(title)
plt.grid(True)

# Define the save path
save_dir = "transformer"
os.makedirs(save_dir, exist_ok=True)
file_name = title + ".png"
save_path = os.path.join(save_dir, file_name)

# Save the plot with high resolution before displaying
plt.savefig(save_path, dpi=900, bbox_inches='tight')
# print(f"Plot saved to {save_path}")

# Now display the plot
plt.show()

# Convert F1 scores to percentages (if needed for further processing)
val_f1_scores_percent = [score * 100 for score in val_f1_scores]


print("Highest Accuracy: {:.2f}%".format(best_accuracy))
print("F1 Score corresponding to highest accuracy: {:.2f}".format(best_f1))

# Plotting Per-Word Performance (Bag of 20 Words)

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
import matplotlib.pyplot as plt

# Example: num_classes = 20 and label2word mapping available
num_classes = 20
per_class_accuracy = []
per_class_f1 = []

# Convert to numpy arrays if not already
all_true = np.array(all_true_labels)
all_pred = np.array(all_predictions)

for i in range(num_classes):
    idx = np.where(all_true == i)[0]
    if len(idx) == 0:
        per_class_accuracy.append(0)
        per_class_f1.append(0)
    else:
        acc = accuracy_score(all_true[idx], all_pred[idx])
        f1 = f1_score(all_true[idx], all_pred[idx], average='weighted')
        per_class_accuracy.append(acc)
        per_class_f1.append(f1)

# Get word labels (ensure order corresponds to class indices)
words = [label2word[i] for i in range(num_classes)]

fig, ax = plt.subplots(figsize=(12, 6))
ax.bar(words, per_class_accuracy, alpha=0.6, label='Accuracy')
ax.plot(words, per_class_f1, color='red', marker='o', label='F1 Score')
ax.set_xlabel("Words")
ax.set_ylabel("Score")
ax.set_title("Per-Word Performance (Accuracy & F1 Score)")
ax.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import os

# ---------------------------
# Sample Data Generation
# ---------------------------
# Replace this with your actual data matrix X (shape: [n_samples, n_features])
# For demonstration, we create random data with 1000 samples and 50 features.
np.random.seed(42)
X = np.random.rand(1000, 50)

# ---------------------------
# Perform PCA
# ---------------------------
# Fit PCA on the data (you can set n_components=None to compute all components)
pca = PCA()
X_pca = pca.fit_transform(X)

# Get explained variance ratios
explained_variance = pca.explained_variance_ratio_

# ---------------------------
# Plot 1: Scree Plot
# ---------------------------
plt.figure(figsize=(8, 6))
components = np.arange(1, len(explained_variance) + 1)
plt.plot(components, explained_variance, marker='o', linestyle='--', color='b')
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.title('Scree Plot: Explained Variance by Principal Components')
plt.xticks(components)
plt.grid(True)

# Save scree plot
save_dir = "transformer"
os.makedirs(save_dir, exist_ok=True)
scree_plot_path = os.path.join(save_dir, "PCA_Scree_Plot.png")
plt.savefig(scree_plot_path, dpi=300, bbox_inches='tight')
print(f"Scree plot saved to {scree_plot_path}")

plt.show()

# ---------------------------
# Plot 2: Scatter Plot of the First Two Principal Components
# ---------------------------
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.7, edgecolor='k')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('Visualization of Feature Reduction using PCA')
plt.grid(True)

# Save scatter plot
scatter_plot_path = os.path.join(save_dir, "PCA_Scatter_Plot.png")
plt.savefig(scatter_plot_path, dpi=600, bbox_inches='tight')
print(f"Scatter plot saved to {scatter_plot_path}")

plt.show()


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

model.eval()
test_preds = []
test_true = []
with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)
        logits = model(batch_x)
        preds = logits.argmax(dim=-1)
        test_preds.extend(preds.cpu().numpy())
        test_true.extend(batch_y.cpu().numpy())

test_acc = accuracy_score(test_true, test_preds)
print("Test Accuracy:", test_acc)

cm = confusion_matrix(test_true, test_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(num_classes))
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix (Test Set)")
plt.show()



cm = confusion_matrix(test_true, test_preds)
# label2word = {v: k for k, v in num_dict.items()}
label2word = num_dict
num_classes = len(label2word)
labels_for_cm = [label2word[i] for i in range(num_classes)]

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels_for_cm)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap=plt.cm.Blues, ax=ax, xticks_rotation='vertical')
plt.title("Confusion Matrix (Test Set)")
plt.show()


# Retrieve Predicted Labels for Each Sample


In [None]:
import torch

model.eval()
all_predictions = []
all_true_labels = []
all_sample_indices = []  # optional: track which sample index

with torch.no_grad():
    for batch_idx, (batch_x, batch_y) in enumerate(test_loader):
        # batch_x shape: (batch_size, seq_len, input_dim)
        # batch_y shape: (batch_size,)
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        # Forward pass
        logits = model(batch_x)  # => shape (batch_size, num_classes)
        preds = logits.argmax(dim=-1)  # => shape (batch_size,)

        # Convert to CPU numpy arrays
        preds_np = preds.cpu().numpy()
        labels_np = batch_y.cpu().numpy()

        # Store results
        all_predictions.extend(preds_np)
        all_true_labels.extend(labels_np)

        # If you want to track the test sample index:
        start_idx = batch_idx * test_loader.batch_size
        sample_indices = range(start_idx, start_idx + len(batch_y))
        all_sample_indices.extend(sample_indices)

# Now all_predictions[i] corresponds to the predicted label for sample i
# all_true_labels[i] is the ground-truth label for sample i


# Mapping Labels Back to Words (Optional)

In [None]:
label2word = {v: k for k, v in num_dict.items()}

# label2word = num_dict

for i in range(len(all_predictions)):
    pred_label = all_predictions[i]
    true_label = all_true_labels[i]
    pred_word = label2word[pred_label]
    true_word = label2word[true_label]
    # print(f"Sample {i}: Predicted '{pred_word}', Actual '{true_word}'")

correct_predictions = sum(1 for i in range(len(all_predictions)) if all_predictions[i] == all_true_labels[i])
total_predictions = len(all_predictions)

accuracy = correct_predictions / total_predictions
print(f"Total Correct: {correct_predictions}/{total_predictions} ({accuracy * 100:.2f}%)")



In [None]:
#Plotting Model Accuracy and F1 Score Over Epochs

In [None]:
import matplotlib.pyplot as plt

# Assume these arrays exist from your training loop
# val_accuracies = [ ... ]  # list of validation accuracies per epoch
# val_f1_scores = [ ... ]   # list of validation F1 scores per epoch

epochs = range(1, len(val_accuracies) + 1)

plt.figure(figsize=(10, 5))
plt.plot(epochs, val_accuracies, label='Validation Accuracy', marker='o')
plt.plot(epochs, val_f1_scores, label='Validation F1 Score', marker='o')
plt.xlabel("Epoch")
plt.ylabel("Score")
plt.title("Model Accuracy and F1 Score Over Epochs")
plt.legend()
plt.grid(True)
plt.show()


# Storing Results to a CSV or TXT File

In [None]:
import csv

with open("predictions.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["SampleIndex", "PredLabel", "TrueLabel", "PredWord", "TrueWord"])

    for i in range(len(all_predictions)):
        pred_label = all_predictions[i]
        true_label = all_true_labels[i]
        pred_word = label2word[pred_label]
        true_word = label2word[true_label]

        # If you tracked sample indices:
        sample_idx = all_sample_indices[i]
        writer.writerow([sample_idx, pred_label, true_label, pred_word, true_word])
