In [1]:
import librosa
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from torchvision.transforms import Compose
import random
import pickle
import torch
from torch.utils.data import DataLoader
import numpy as np
import torchaudio.transforms as T
from coatnet import CoAtNet as CoAtNetImp
import matplotlib.pyplot as plt
import time
from sklearn.base import BaseEstimator
#Save results to csv
from sklearn.metrics import accuracy_score, precision_score, recall_score
import sklearn

#for custom libraries
import sys
sys.path.insert(1,'../SimplifiedPythonFiles/')
import NoiseFilterTorch as nft #unfortunate
import KeyIsolatorTorchLegacy as kitl

#for language model
from openai import Client #better choice than ollama

# About

This version of the model will attempt to use Torch to speed up some of the audio processing.

## Building the dataset

In [2]:
# device selection
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Device: {device}")

MBP_AUDIO_DIR='../MKA-dataset/', '../Dataset-custom-audio/base-audio-denoised-normalized/', '../Dataset-for-Binary/base-audio/'
PREFIX='','','audio_'
SUFFIX='mac','',''
DATASET_PICK='MKA','Custom_Denoised_Normalized','Binary'

keys = list('1234567890QWERTYUIOPASDFGHJKLZXCVBNM+-') # remove + - when using dataset for binary

Device: cuda


In [3]:
#SELECT DATASET
dataset_choice=1
if dataset_choice==1: #tweak until optimal parameters :'3
        n_fft = 1000
        hop_length = 4096
        before = 550
        after = 550
        threshold=0.07
else: 
        n_fft = 2048
        hop_length = 512
        before = 2205
        after = 2205
        threshold = 0.1

Custom dataset: True


In [None]:
# Usage - with KeyIsolatorTorch

audio_dataset = kitl.create_dataset(keys, MBP_AUDIO_DIR[dataset_choice], PREFIX[dataset_choice],SUFFIX[dataset_choice] ,show=False, n_fft=n_fft, hop_length=hop_length, before=before, after=after, threshold=threshold) 
filter_pick="kit"

print(audio_dataset)

path: ../Dataset-custom-audio/base-audio-denoised-normalized/1.wav


  return _VF.stft(input, n_fft, hop_length, win_length, window,  # type: ignore[attr-defined]


Key: 1 | Number of keystrokes: 30
path: ../Dataset-custom-audio/base-audio-denoised-normalized/2.wav
Key: 2 | Number of keystrokes: 28
path: ../Dataset-custom-audio/base-audio-denoised-normalized/3.wav
Key: 3 | Number of keystrokes: 24
path: ../Dataset-custom-audio/base-audio-denoised-normalized/4.wav
Key: 4 | Number of keystrokes: 45
path: ../Dataset-custom-audio/base-audio-denoised-normalized/5.wav
Key: 5 | Number of keystrokes: 36
path: ../Dataset-custom-audio/base-audio-denoised-normalized/6.wav
Key: 6 | Number of keystrokes: 30
path: ../Dataset-custom-audio/base-audio-denoised-normalized/7.wav
Key: 7 | Number of keystrokes: 41
path: ../Dataset-custom-audio/base-audio-denoised-normalized/8.wav
Key: 8 | Number of keystrokes: 37
path: ../Dataset-custom-audio/base-audio-denoised-normalized/9.wav
Key: 9 | Number of keystrokes: 42
path: ../Dataset-custom-audio/base-audio-denoised-normalized/0.wav
Key: 0 | Number of keystrokes: 29
path: ../Dataset-custom-audio/base-audio-denoised-normali

In [5]:
# Usage - with NoiseFilterTorch
#audio_dataset, _ = nft.create_dataset(keys, MBP_AUDIO_DIR[dataset_choice], plot=False,  preffix=PREFIX[dataset_choice], suffix=SUFFIX[dataset_choice], length=1000) 
#filter_pick="nft"

#print(audio_dataset)

In [6]:
#filter_pick="kit"
#filter_pick="nft"

In [7]:
# Write audio_dataset to a file to avoid running this shit over & over again
#with open(f'audio_dataset_{filter_pick}_{DATASET_PICK[dataset_choice]}.pkl', 'wb') as f:
#    pickle.dump(audio_dataset, f)

In [8]:
# Read audio_dataset back from the file
#with open(f'audio_dataset_{filter_pick}_{DATASET_PICK[dataset_choice]}.pkl', 'rb') as f:
#    audio_dataset = pickle.load(f)

In [9]:
audio_dataset

Unnamed: 0,Key,File
0,0,"[tensor(-0.0376), tensor(-0.0350), tensor(-0.0..."
1,0,"[tensor(0.0072), tensor(0.0076), tensor(0.0070..."
2,0,"[tensor(0.0647), tensor(0.0643), tensor(0.0648..."
3,0,"[tensor(-0.0377), tensor(-0.0361), tensor(-0.0..."
4,0,"[tensor(0.0068), tensor(0.0010), tensor(-0.008..."
...,...,...
1398,37,"[tensor(0.0845), tensor(0.0913), tensor(0.0690..."
1399,37,"[tensor(-0.0181), tensor(-0.0166), tensor(-0.0..."
1400,37,"[tensor(-0.0273), tensor(-0.0102), tensor(0.02..."
1401,37,"[tensor(-0.0122), tensor(-0.0038), tensor(-0.0..."


## Preprocessing the dataset

In [10]:
def random_uniform_torch(all_frames_num, num_frames_to_mask):
    low = 0.0
    high = all_frames_num - num_frames_to_mask
    t0 = torch.empty(1).uniform_(low, high).item()
    return t0

In [11]:
def time_shift(samples):
    samples = samples.flatten()
    shift = int(len(samples) * 0.4) #Max shift (0.4)
    random_shift = random.randint(0, shift) #Random number between 0 and 0.4*len(samples)
    data_roll = torch.roll(samples, random_shift)
    return data_roll

def masking(samples):
    num_mask = 2
    freq_masking_max_percentage=0.10
    time_masking_max_percentage=0.10
    spec = samples
    mean_value = spec.mean()
    for i in range(num_mask):
        all_frames_num, all_freqs_num = spec.shape[1], spec.shape[1] 
        freq_percentage = random.uniform(0.1, freq_masking_max_percentage)

        num_freqs_to_mask = int(freq_percentage * all_freqs_num)
        f0 = random_uniform_torch(all_freqs_num, num_freqs_to_mask)
        f0 = int(f0)
        spec[:, f0:f0 + num_freqs_to_mask] = mean_value

        time_percentage = random.uniform(0.1, time_masking_max_percentage)

        num_frames_to_mask = int(time_percentage * all_frames_num)
        t0 = random_uniform_torch(all_frames_num, num_frames_to_mask)
        t0 = int(t0)
        spec[t0:t0 + num_frames_to_mask, :] = mean_value
    return spec

In [12]:
class ToMelSpectrogram:
    def __init__(self, device='cpu', audio_length=14400, sample_rate=44100, n_mels=64, n_fft=1024, hop_length=225, plot=True):
        self.audio_length = audio_length
        self.sample_rate = sample_rate
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.plot=plot
        print(f'device::{device}')
        self.device = torch.device(device)
        self.mel_spectrogram = T.MelSpectrogram(
            sample_rate=self.sample_rate,
            n_mels=self.n_mels,
            n_fft=self.n_fft,
            hop_length=self.hop_length
        ).to(self.device)

    def __call__(self, samples):
        # print(f"Original samples shape: {samples.shape}")
        
        if len(samples) > self.audio_length:
            samples = samples[:self.audio_length]
        elif len(samples) < self.audio_length:
            samples = torch.nn.functional.pad(samples, (0, self.audio_length - len(samples)), mode='constant')

        # print(f"Samples shape after padding/trimming: {samples.shape}")

        samples = samples.unsqueeze(0).to(self.device)  # Shape: (1, 1, audio_length)
        # print(f"Samples shape after unsqueeze and to(device): {samples.shape}")

        mel_spec = self.mel_spectrogram(samples)
        # print(f"Mel spectrogram shape: {mel_spec.shape}")

        mel_spec_resized = torch.nn.functional.interpolate(mel_spec.unsqueeze(0), size=(64, 64), mode='bilinear', align_corners=False)
        # print(f"Mel spectrogram resized shape: {mel_spec_resized.shape}")

        if self.plot:
            self.plot_melspec(mel_spec_resized)
        return mel_spec_resized.squeeze(0)
    
    def plot_melspec(self, mel_spec):
        # Assuming mel_spec is a CUDA tensor
        mel_spec_cpu = mel_spec.cpu().numpy()
        # print(f"Mel spectrogram shape on CPU: {mel_spec_cpu.shape}")

        # Convert the mel spectrogram to decibel units
        mel_spec_db = librosa.power_to_db(mel_spec_cpu.squeeze(), ref=np.max)
        # print(f"Mel spectrogram shape after squeezing: {mel_spec_db.shape}")


        # Plot the mel spectrogram
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(mel_spec_db, sr=self.sample_rate, hop_length=self.hop_length, x_axis='time', y_axis='mel', fmax=8000)
        plt.colorbar(format='%+2.0f dB')
        plt.title('Mel Spectrogram')
        plt.tight_layout()
        plt.show()

class ToMfcc:
    def __init__(self, device='cpu', audio_length=14400, sample_rate=44100, n_mfcc=13, n_fft=1024, hop_length=512):
        self.audio_length = audio_length
        self.sample_rate = sample_rate
        self.n_mfcc = n_mfcc
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.device = torch.device(device)
        self.mfcc_transform = T.MFCC(
            sample_rate=self.sample_rate,
            n_mfcc=self.n_mfcc,
            melkwargs={
                'n_fft': self.n_fft,
                'hop_length': self.hop_length,
                'n_mels': 64,
                'center': True,
                'pad_mode': 'reflect',
                'power': 2.0
            }
        ).to(self.device)


    def __call__(self, samples):
        if len(samples) > self.audio_length:
            samples = samples[:self.audio_length]
        elif len(samples) < self.audio_length:
            samples = F.pad(samples, (0, self.audio_length - len(samples)), mode='constant')

        samples = samples.unsqueeze(0).to(self.device)  # Shape: (1, audio_length)
        mfcc_spec = self.mfcc_transform(samples)
        mfcc_spec = mfcc_spec.squeeze(0).transpose(0, 1)  # Shape: (time, n_mfcc)
        return mfcc_spec


In [13]:
transform = Compose([ToMelSpectrogram(device,12000,plot=False)])
transform_mfcc = Compose([ToMfcc(device, 12000)])

device::cuda


In [14]:
audio_samples = audio_dataset['File'].values.tolist()
labels = audio_dataset['Key'].values.tolist()

audio_samples_new = audio_samples.copy() # audio samples CNN
print(len(audio_samples))

print(type(audio_samples[0]))

for i, sample in enumerate(audio_samples):
    audio_samples_new.append(time_shift(sample))
    labels.append(labels[i])

# convert labels to a numpy array
labels = np.array(labels)
print(len(audio_samples_new))
print(len(labels))

1403
<class 'torch.Tensor'>
2806
2806


In [15]:
audioDatasetFin, audioDatasetFinMasking, audioDatasetMfcc, audioDatasetMfccMasking = [], [], [], []

for i in range(len(audio_samples_new)):
    # Print the shape of the input tensor
    print(f"Shape of input tensor before transformation: {audio_samples_new[i].shape}")
    transformed_sample = transform(audio_samples_new[i])
    #transformed_mfcc = transform_mfcc(audio_samples_new[i])
    
    # CoAtNet part
    #audioDatasetFin.append((transformed_sample, labels[i]))
    #audioDatasetFinMasking.append((masking(transformed_sample), labels[i]))
    
    # masking part
    #audioDatasetMfcc.append((transformed_sample, transformed_mfcc, labels[i]))
    #audioDatasetMfccMasking.append((masking(transformed_sample), transformed_mfcc, labels[i]))


Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformation: torch.Size([1100])
Shape of input tensor before transformat

In [16]:
# check for lengths of datasets
len(transformed_sample)

1

## Models

### Updated CoAtNet

In [17]:
num_blocks = [2, 2, 3, 5, 2]            # L
channels = [64, 96, 192, 384, 768]      # D

class CoAtNetNFT(nn.Module, BaseEstimator):
    def __init__(self, num_epochs=100, patience=20, keys='1234567890QWERTYUIOPASDFGHJKLZXCVBNM+-'):
        super(CoAtNetNFT, self).__init__()    
        self.keys = keys
        self.model = CoAtNetImp((64, 64), 1, num_blocks, channels, num_classes=len(self.keys))
        self.num_epochs = num_epochs
        self.patience = patience
    
    def forward(self, x):
        return self.model(x)
    
    def fit(self, dataset, lr=1e-4):
        self.dataset = dataset
        # split dataset for training
        train_set, val_set = train_test_split(self.dataset, test_size=0.05, random_state=42)
        train_loader, val_loader = DataLoader(train_set, batch_size=16), DataLoader(val_set, batch_size=16)

        # Initialize model, optimizer, and loss function
        self._optimizer = optim.Adam(self.model.parameters(), lr=lr)
        model = self.model.to(device)
        
        # loss criterion
        criterion = nn.CrossEntropyLoss()
        
        best_val_acc, epochs_no_imp = 0, 0
        train_accuracies, val_accuracies = [], []
        
        for epoch in range(self.num_epochs):
            model.train()
            epoch_train_loss = 0.0
            correct_train = 0
            total_train = 0
            tic = time.perf_counter()
            
            for images, labels in train_loader:
                images = images.to(device)
                labels = labels.to(device)
                
                self._optimizer.zero_grad()
        
                # converting labels to Long to avoid error "not implemented for Int"
                labels = labels.long()
                
                # Forward pass
                outputs = model(images)
                loss = criterion(outputs, labels)
                epoch_train_loss += loss.item() * images.size(0)
        
                _, predicted_train = torch.max(outputs.data, 1)
                total_train += labels.size(0)
                correct_train += (predicted_train == labels).sum().item()
                
                # Backward pass
                loss.backward()
                self._optimizer.step()
                # scheduler.step()
            
            toc = time.perf_counter()
            time_taken = toc - tic
            
            epoch_train_loss /= len(train_loader.dataset)
            train_accuracy = correct_train / total_train
            train_accuracies.append(train_accuracy)
            
            # Evaluation of the model
            model.eval()
            total, correct = 0, 0
            
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
        
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            val_accuracy = correct / total
            val_accuracies.append(val_accuracy)
            if (epoch + 1) % 1 == 0 or epoch == 0:
                print(f"Epoch [{epoch + 1}/{self.num_epochs}], Train Loss: {epoch_train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Accuracy: {val_accuracy:.4f}, Iter Time: {time_taken:.2f}s")
            if val_accuracy > best_val_acc:
                best_val_acc = val_accuracy
                epochs_no_imp = 0
                best_model_state = model.state_dict()  # Save the best model
            else:
                epochs_no_imp += 1
            if epochs_no_imp >= self.patience:
                print(f'Early stopping after {epoch+1} epochs')
                model.load_state_dict(best_model_state)  # Load the best model
                break
        torch.save(self.model.state_dict(), f'models/{DATASET_PICK[dataset_choice]}.pth')
        return self, epoch+1
    
    def predict(self, X, load=False):
    # Load the best model
        if load:
            self.model.load_state_dict(torch.load(f'models/{DATASET_PICK[dataset_choice]}.pth'))
        
        # Ensure X is a list of tensors or a single tensor
        if isinstance(X, list):
            X = torch.stack([torch.tensor(x) for x in X]).to(device)
        else:
            X = torch.tensor(X).to(device)

        # Model specifying
        model = self.model.to(device)
        model.eval()

        with torch.no_grad():
            outputs = model(X)
            _, predicted = torch.max(outputs.data, 1)
        
        # Debugging: Print the outputs and predictions
        # Set print options to print the full tensor
        torch.set_printoptions(profile="default")
        
        return predicted.tolist()

## Training model

In [18]:
dataset = audio_samples_new
train_set, test_set, labels_train_set, labels_test_set = train_test_split(dataset, labels, test_size=0.001, random_state=42)
final_train_set = []


for i in range(len(train_set)):
    transformed_sample = transform(train_set[i])
    final_train_set.append((transformed_sample, labels_train_set[i]))
    final_train_set.append((masking(transformed_sample), labels_train_set[i]))
X_train = [t[0] for t in final_train_set]
y_train = [t[1] for t in final_train_set]
print(len(final_train_set))

dataset_training=[(X_train[i], y_train[i]) for i in range(len(X_train))]


5606


In [19]:
model = CoAtNetNFT(keys=keys, num_epochs=100, patience=15)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [20]:
_, epochs=model.fit(dataset_training, lr=5e-5) #lr=5e-5

Epoch [1/100], Train Loss: 3.4861, Train Accuracy: 0.0740, Val Accuracy: 0.0285, Iter Time: 20.40s
Epoch [2/100], Train Loss: 2.8922, Train Accuracy: 0.1854, Val Accuracy: 0.0569, Iter Time: 17.20s
Epoch [3/100], Train Loss: 2.2482, Train Accuracy: 0.3527, Val Accuracy: 0.0641, Iter Time: 15.46s
Epoch [4/100], Train Loss: 1.4942, Train Accuracy: 0.5718, Val Accuracy: 0.0712, Iter Time: 15.50s
Epoch [5/100], Train Loss: 1.0068, Train Accuracy: 0.7048, Val Accuracy: 0.0854, Iter Time: 19.02s
Epoch [6/100], Train Loss: 0.7308, Train Accuracy: 0.7833, Val Accuracy: 0.0534, Iter Time: 19.00s
Epoch [7/100], Train Loss: 0.5610, Train Accuracy: 0.8302, Val Accuracy: 0.0747, Iter Time: 19.60s
Epoch [8/100], Train Loss: 0.4579, Train Accuracy: 0.8627, Val Accuracy: 0.0605, Iter Time: 18.89s
Epoch [9/100], Train Loss: 0.3736, Train Accuracy: 0.8823, Val Accuracy: 0.0712, Iter Time: 19.05s
Epoch [10/100], Train Loss: 0.3072, Train Accuracy: 0.9072, Val Accuracy: 0.0534, Iter Time: 18.65s
Epoch [11

## Prediction

For the prediction, a custom word is tested by picking audio samples from the overall dataset.

In [21]:
class Predictor():
    def __init__(self, dataset, labels, keys):
        self.curr_word, self.curr_labels = [], []
        self.dataset = dataset
        self.labels = labels
        self.keys = keys

    #find the index first
    def find_index(self, key_char):
        try:
            return self.keys.index(key_char)
        except ValueError:
            return -1

    #find the first matching index in the dataset
    def find_first_match(self, key_char):
        index = self.find_index(key_char)
        if index == -1:
            return -1, -1
        for i in range(len(self.dataset)):
            if labels[i] == index:
                print("found for ", key_char," at ", labels[i])
                return self.dataset[i], labels[i]
            print(f"Key {key_char} not found in dataset")
        return -1, -1

    def prepare_word(self, word):
        word=word.upper()
        for letter in word:
        #if letter is space or enter, replace with + or -
            if letter == ' ':
                letter = '-'
            elif letter == '\n':
                letter = '+' 
            character, letter=self.find_first_match(letter)
            self.curr_word.append(character)
            self.curr_labels.append(letter)
                
        print(f'curr_labels: {self.curr_labels}')
        print(f'curr_word: {self.curr_word}')

    def convert_torch(self):
        test_set_word = []
        for i in range(len(self.curr_word)):
            transformed_word = transform(self.curr_word[i])
            test_set_word.append((transformed_word, self.curr_labels[i]))
        self.curr_word_processed = [t[0] for t in test_set_word]
        self.curr_labels_processed = [t[1] for t in test_set_word]
        print(len(test_set_word))

    def getIndCurrKeys(self, ind: int):
        if ind < 0 or ind >= len(self.keys):
            raise IndexError(f"Index {ind} is out of range for keys list with length {len(self.keys)}")
        return self.keys[ind]

    def process_word(self):
        self.curr_word_processed = tuple(self.curr_word_processed)
        self.curr_word_processed=torch.stack(self.curr_word_processed)
        self.curr_labels_processed = torch.tensor(self.curr_labels_processed, dtype=torch.long)

    def calculate_metrics(self, epochs):
        # Metrics calculation
        accuracy = accuracy_score(self.og_labels_list, self.prediction_list)
        precision = precision_score(self.og_labels_list, self.prediction_list, average='macro')
        recall = recall_score(self.og_labels_list, self.prediction_list, average='macro')
        f1 = sklearn.metrics.f1_score(self.og_labels_list, self.prediction_list, average='macro')

        # Save in csv file
        nft.save_csv("CoAtNetNFT", epochs, "With patience=100, using time shift", accuracy, precision, recall, f1)

        # Print results
        print("Final Results!")
        print(f"Model: CoAtNetNFT")
        print("With patience=100, using time shift")
        print(f"Epochs: {epochs}")
        print(f"Accuracy: {accuracy}")
        print(f"Precision: {precision}")
        print(f"Recall: {recall}")
        print(f"F1 Score: {f1}")

    def predict(self, model, word): #run this one
        self.prepare_word(word)
        self.convert_torch()
        self.process_word()
        
        # Assuming model.predict returns a list of indices
        prediction = model.predict(self.curr_word_processed,load=True)

        self.prediction_list = list(map(self.getIndCurrKeys, prediction)) 
        self.og_labels_list=list(map(self.getIndCurrKeys, self.curr_labels_processed))
        
        print(f'prediction: {self.prediction_list}')
        print(f'real labels: {self.og_labels_list}')

    def llama_predict(self):
        # revert use of - and + to space and enter
        prediction_list = list(map(lambda x: ' ' if x == '-' else x, self.prediction_list))
        prediction_list = list(map(lambda x: '\n' if x == '+' else x, prediction_list))
        sentence=''.join(prediction_list)

        # LLAMA 3.1 prediction
        try:
            client = Client(base_url='http://localhost:1234/v1', api_key='llm-studio')

            response = client.chat.completions.create(model='lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF', messages=[
            {
                'role': 'user',
                'content': sentence
            },
            ])
            print("Question:", sentence)
            print("Response:", response.choices[0].message.content)
        except Exception as e:
            print(f"An error occurred: {e}")

    

In [22]:
print(keys)

['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', 'O', 'P', 'A', 'S', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'Z', 'X', 'C', 'V', 'B', 'N', 'M', '+', '-']


In [23]:
word = "I wanna win the rat race nevermind second" # for dataset 3 avoid using space and enter keys
predictor=Predictor(dataset, labels, keys)
predictor.predict(model, word)
predictor.calculate_metrics(epochs)
#predictor.llama_predict()

Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
Key I not found in dataset
K

  X = torch.tensor(X).to(device)


prediction: ['J', '-', 'S', '+', 'U', 'U', '+', '-', 'S', 'J', 'U', '-', 'X', 'I', 'I', '-', 'P', '+', 'X', '-', 'P', '+', 'P', 'I', '-', 'U', 'I', 'P', 'I', 'P', 'O', 'J', 'U', 'I', '-', 'S', 'I', 'P', 'P', 'U', 'I']
real labels: ['I', '-', 'W', 'A', 'N', 'N', 'A', '-', 'W', 'I', 'N', '-', 'T', 'H', 'E', '-', 'R', 'A', 'T', '-', 'R', 'A', 'C', 'E', '-', 'N', 'E', 'V', 'E', 'R', 'M', 'I', 'N', 'D', '-', 'S', 'E', 'C', 'O', 'N', 'D']
Final Results!
Model: CoAtNetNFT
With patience=100, using time shift
Epochs: 40
Accuracy: 0.1951219512195122
Precision: 0.06666666666666667
Recall: 0.1
F1 Score: 0.075


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Legacy

In [25]:
# Custom prediction

word = "otorhinolaryngologist" # for dataset 3 avoid using space and enter keys
word = word.upper()
curr_word, curr_labels = [], []

# Space -> -
# Enter -> +

print(f'keys: {keys}')
print(f'keys length: {len(keys)}')
#find the index first
def find_index(keys, key_char):
  try:
    return keys.index(key_char)
  except ValueError:
    return -1

#find the first matching index in the dataset
def find_first_match(dataset, labels, key_char):
  index = find_index(keys, key_char)
  if index == -1:
    return -1, -1
  for i in range(len(dataset)):
    if labels[i] == index:
      return dataset[i], labels[i]
  return -1, -1

for letter in word:
  #if letter is space or enter, replace with + or -
  if letter == ' ':
    letter = '-'
  elif letter == '\n':
    letter = '+' 
  character, letter=find_first_match(dataset, labels, letter)
  curr_word.append(character)
  curr_labels.append(letter)
        
print(f'curr_labels: {curr_labels}')
print(f'curr_word: {curr_word}')

keys: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', 'O', 'P', 'A', 'S', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'Z', 'X', 'C', 'V', 'B', 'N', 'M']
keys length: 36
curr_labels: [18, 14, 18, 13, 25, 17, 34, 18, 28, 20, 13, 15, 34, 24, 18, 28, 18, 24, 17, 21, 14]
curr_word: [tensor([0.2460, 0.2426, 0.2378,  ..., 0.0100, 0.0102, 0.0105]), tensor([ 0.0003,  0.0003,  0.0002,  ..., -0.0038, -0.0043, -0.0050]), tensor([0.2460, 0.2426, 0.2378,  ..., 0.0100, 0.0102, 0.0105]), tensor([ 0.0619,  0.0626,  0.0637,  ..., -0.0208, -0.0210, -0.0213]), tensor([-0.0005, -0.0005, -0.0005,  ...,  0.0098,  0.0091,  0.0088]), tensor([0.0005, 0.0005, 0.0005,  ..., 0.1304, 0.1306, 0.1310]), tensor([-0.0002, -0.0002, -0.0001,  ...,  0.0170,  0.0166,  0.0163]), tensor([0.2460, 0.2426, 0.2378,  ..., 0.0100, 0.0102, 0.0105]), tensor([ 0.3659,  0.3630,  0.3602,  ..., -0.0663, -0.0653, -0.0643]), tensor([-7.6899e-06, -1.1004e-05, -1.4175e-05,  ...,  7.4053e-03,
         6.8488

In [26]:
test_set_word = []
for i in range(len(curr_word)):
    transformed_word = transform(curr_word[i])
    test_set_word.append((transformed_word, curr_labels[i]))
    #test_set_word.append((masking(transformed_word), curr_labels[i]))
curr_word_processed = [t[0] for t in test_set_word]
curr_labels_processed = [t[1] for t in test_set_word]
print(len(test_set_word))

21


In [27]:
def getIndCurrKeys(ind: int):
    if ind < 0 or ind >= len(keys):
        raise IndexError(f"Index {ind} is out of range for keys list with length {len(keys)}")
    return keys[ind]

curr_word_processed = tuple(curr_word_processed)
curr_word_processed=torch.stack(curr_word_processed)
curr_labels_processed = torch.tensor(curr_labels_processed, dtype=torch.long)

# Assuming model.predict returns a list of indices
prediction = model.predict(curr_word_processed,load=True)

prediction_list = list(map(getIndCurrKeys, prediction)) 
og_labels_list=list(map(getIndCurrKeys, curr_labels_processed))
print(f'prediction: {prediction_list}')

print(f'real labels: {og_labels_list}')

prediction: ['O', 'T', 'O', 'R', 'H', 'I', 'N', 'O', 'F', 'A', 'R', 'F', 'N', 'G', 'O', 'F', 'O', 'G', 'I', 'S', 'T']
real labels: ['O', 'T', 'O', 'R', 'H', 'I', 'N', 'O', 'L', 'A', 'R', 'Y', 'N', 'G', 'O', 'L', 'O', 'G', 'I', 'S', 'T']


  X = torch.tensor(X).to(device)


In [28]:
# Metrics calculation
accuracy = accuracy_score(og_labels_list, prediction_list)
precision = precision_score(og_labels_list, prediction_list, average='macro')
recall = recall_score(og_labels_list, prediction_list, average='macro')
f1 = sklearn.metrics.f1_score(og_labels_list, prediction_list, average='macro')

# Save in csv file
nft.save_csv("CoAtNetNFT", 1000, "With patience=100, using time shift", accuracy, precision, recall, f1)

# Print results
print("Final Results!")
print(f"Model: CoAtNetNFT")
print("With patience=100, using time shift")
print(f"Epochs: {1000}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Final Results!
Model: CoAtNetNFT
With patience=100, using time shift
Epochs: 1000
Accuracy: 0.8571428571428571
Precision: 0.75
Recall: 0.75
F1 Score: 0.75


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# revert use of - and +

prediction_list = list(map(lambda x: ' ' if x == '-' else x, prediction_list))
prediction_list = list(map(lambda x: '\n' if x == '+' else x, prediction_list))
sentence=''.join(prediction_list)

# LLAMA 3.1 prediction
try:
    client = Client(base_url='http://localhost:1234/v1', api_key='llm-studio')

    response = client.chat.completions.create(model='lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF', messages=[
      {
        'role': 'user',
        'content': sentence
      },
    ])
    print("Question:", sentence)
    print("Response:", response.choices[0].message.content)
except Exception as e:
    print(f"An error occurred: {e}")

An error occurred: Connection error.


ERROR:asyncio:Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "c:\Users\Kat\.conda\envs\tesis\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "c:\Users\Kat\.conda\envs\tesis\lib\asyncio\selector_events.py", line 120, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
ERROR:asyncio:Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "c:\Users\Kat\.conda\envs\tesis\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "c:\Users\Kat\.conda\envs\tesis\lib\asyncio\selector_events.py", line 120, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError