In [64]:
import numpy as np
import os
from torch.utils.data import Dataset, DataLoader
import torch

class CustomDataset(Dataset):
    def __init__(self, data_array, labels_array):
        self.data_array = data_array
        self.labels_array = labels_array

    def __len__(self):
        return len(self.data_array)

    def __getitem__(self, idx):
        return torch.Tensor(self.data_array[idx]), torch.Tensor([self.labels_array[idx]])

def collate_fn(batch):
    data, labels = zip(*batch)
    return torch.stack(data), torch.cat(labels)

# Your data processing code here
data_root = "train_data"
data_list = []
labels_list = []
max_sequence_length = 0

for language_folder in os.listdir(data_root):
    language_path = os.path.join(data_root, language_folder)

    if os.path.isdir(language_path):
        for npy_file in os.listdir(language_path):
            if npy_file.endswith(".npy"):
                npy_path = os.path.join(language_path, npy_file)
                data = np.load(npy_path)
                max_sequence_length = max(max_sequence_length, data.shape[0])
                data_list.append(data)
                labels_list.append(int(language_folder.split('_')[-1]))

test_root = "test_data"
test_list = []
for npy_file in os.listdir(test_root):
    if npy_file.endswith(".npy"):
            npy_path = os.path.join(test_root, npy_file)
            test = np.load(npy_path)
            max_sequence_length = max(max_sequence_length, test.shape[0])
            test_list.append(test)

for i in range(len(data_list)):
    current_sequence_length = data_list[i].shape[0]
    if current_sequence_length < max_sequence_length:
        pad_length = max_sequence_length - current_sequence_length
        data_list[i] = np.pad(data_list[i], ((0, pad_length), (0, 0)), 'wrap')

for i in range(len(test_list)):
    current_sequence_length = test_list[i].shape[0]
    if current_sequence_length < max_sequence_length:
        pad_length = max_sequence_length - current_sequence_length
        test_list[i] = np.pad(test_list[i], ((0, pad_length), (0, 0)), 'wrap')

data_array = np.array(data_list)
labels_array = np.array(labels_list)

test_array = np.array(test_list)
test_tensor = torch.tensor(test_array)

# Create a dataset
dataset = CustomDataset(data_array, labels_array)

# Split the dataset into training and validation sets
total_samples = len(dataset)
train_size = int(0.8 * total_samples)
val_size = total_samples - train_size

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, drop_last=False)

test_loader = DataLoader(test_tensor, batch_size=batch_size, shuffle=False,drop_last=False)

In [88]:
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

feature_size = 80  # Assuming 80 features in your data
sequence_length = max_sequence_length
hidden_size = 128
num_classes = 2


class TransformerClassifier(nn.Module):
    def __init__(self, hidden_size, num_classes, num_heads=4, num_encoder_layers=10, dropout=0.3):
        super(TransformerClassifier, self).__init__()

        out_channels = 256
        kernel_size = 5
        self.conv1d = nn.Conv1d(in_channels=sequence_length, out_channels=out_channels, kernel_size=kernel_size)
        self.relu = nn.SELU()
        self.embedding = nn.Linear(out_channels * (feature_size - kernel_size + 1), hidden_size)

        self.transformer_encoder_layers = nn.TransformerEncoderLayer(
            d_model=hidden_size, nhead=num_heads, dropout=dropout
        )
        self.transformer_encoder = nn.TransformerEncoder(
            self.transformer_encoder_layers, num_layers=num_encoder_layers
        )

        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = self.relu(self.conv1d(x))
        x = x.view(x.shape[0], -1)
        x = self.embedding(x)
        x = x.view(-1, x.shape[0], x.shape[1])
        output = self.transformer_encoder(x)
        output = output.view(output.shape[1], -1)
        output = self.fc(output)

        return output

# Move the model to GPU
model = TransformerClassifier(hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    for batch_data, batch_labels in train_loader:
        # Move data to GPU
        batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)

        # Forward pass
        outputs = model(batch_data)
        loss = criterion(outputs, batch_labels.long())

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for val_data, val_labels in val_loader:
            # Move validation data to GPU
            val_data, val_labels = val_data.to(device), val_labels.to(device)

            val_outputs = model(val_data)
            _, predicted = torch.max(val_outputs.data, 1)
            total += val_labels.size(0)
            correct += (predicted == val_labels.long()).sum().item()

        accuracy = correct / total
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Validation Accuracy: {100 * accuracy:.2f}%')



torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])
torch.Size([1, 32, 128])


KeyboardInterrupt: 

In [1]:
import numpy as np
import os
from torch.utils.data import Dataset, DataLoader
import torch

class CustomDataset(Dataset):
    def __init__(self, data_array, labels_array):
        self.data_array = data_array
        self.labels_array = labels_array

    def __len__(self):
        return len(self.data_array)

    def __getitem__(self, idx):
        return torch.Tensor(self.data_array[idx]), torch.Tensor([self.labels_array[idx]])

def collate_fn(batch):
    data, labels = zip(*batch)
    return torch.stack(data), torch.cat(labels)

# Your data processing code here
data_root = "train_data"
data_list = []
labels_list = []
max_sequence_length = 0
min_sequence_length = 10000

for language_folder in os.listdir(data_root):
    language_path = os.path.join(data_root, language_folder)

    if os.path.isdir(language_path):
        for npy_file in os.listdir(language_path):
            if npy_file.endswith(".npy"):
                npy_path = os.path.join(language_path, npy_file)
                data = np.load(npy_path)
                max_sequence_length = max(max_sequence_length, data.shape[0])
                min_sequence_length = min(min_sequence_length, data.shape[0])
                data_list.append(data)
                labels_list.append(int(language_folder.split('_')[-1]))

test_root = "test_data"
test_list = []
for npy_file in os.listdir(test_root):
    if npy_file.endswith(".npy"):
            npy_path = os.path.join(test_root, npy_file)
            test = np.load(npy_path)
            max_sequence_length = max(max_sequence_length, test.shape[0])
            min_sequence_length = min(min_sequence_length, test.shape[0])
            test_list.append(test)

for i in range(len(data_list)):
    current_sequence_length = data_list[i].shape[0]
    if current_sequence_length > min_sequence_length:
        data_list[i] = data_list[i][:min_sequence_length, :]

for i in range(len(test_list)):
    current_sequence_length = test_list[i].shape[0]
    if current_sequence_length > min_sequence_length:
        test_list[i] = test_list[i][:min_sequence_length,:]

data_array = np.array(data_list)
labels_array = np.array(labels_list)

test_array = np.array(test_list)
test_tensor = torch.tensor(test_array)

# Create a dataset
dataset = CustomDataset(data_array, labels_array)

# Split the dataset into training and validation sets
total_samples = len(dataset)
train_size = int(0.8 * total_samples)
val_size = total_samples - train_size

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn, drop_last=False)

test_loader = DataLoader(test_tensor, batch_size=batch_size, shuffle=False,drop_last=False)

In [15]:
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

feature_size = 80  # Assuming 80 features in your data
sequence_length = min_sequence_length


model_params = {
        'hidden_size': 128,
        'num_heads': 4,
        'num_encoder_layers': 4,
        'dropout': 0.3,
        'out_channels': 32,
        'kernel_size': 3
    }


class TransformerClassifier(nn.Module):
    def __init__(self, hidden_size=128, num_heads=4, num_encoder_layers=4, dropout=0.3, out_channels=32, kernel_size=3):
        super(TransformerClassifier, self).__init__()

        self.conv1d = nn.Conv1d(in_channels=sequence_length, out_channels=out_channels, kernel_size=kernel_size)
        self.relu = nn.SELU()
        self.embedding = nn.Linear(out_channels * (feature_size - kernel_size + 1), hidden_size)

        self.transformer_encoder_layers = nn.TransformerEncoderLayer(
            d_model=hidden_size, nhead=num_heads, dropout=dropout
        )
        self.transformer_encoder = nn.TransformerEncoder(
            self.transformer_encoder_layers, num_layers=num_encoder_layers
        )

        self.fc = nn.Linear(hidden_size, 2)

    def forward(self, x):
        x = self.relu(self.conv1d(x))
        x = x.view(x.shape[0], -1)
        x = self.embedding(x)
        x = x.view(-1, x.shape[0], x.shape[1])
        output = self.transformer_encoder(x)
        output = output.view(output.shape[1], -1)
        output = self.fc(output)

        return output

# Move the model to GPU
model = TransformerClassifier().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

# Training loop
num_epochs = 100
model.train()
for epoch in range(num_epochs):
    for batch_data, batch_labels in train_loader:
        # Move data to GPU
        batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)

        # Forward pass
        outputs = model(batch_data)
        loss = criterion(outputs, batch_labels.long())

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for val_data, val_labels in val_loader:
            # Move validation data to GPU
            val_data, val_labels = val_data.to(device), val_labels.to(device)

            val_outputs = model(val_data)
            _, predicted = torch.max(val_outputs.data, 1)
            total += val_labels.size(0)
            correct += (predicted == val_labels.long()).sum().item()

        accuracy = correct / total
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Validation Accuracy: {100 * accuracy:.2f}%')
    model.train()



Epoch [1/100], Loss: 0.6843, Validation Accuracy: 69.38%
Epoch [2/100], Loss: 0.4639, Validation Accuracy: 70.75%
Epoch [3/100], Loss: 0.6886, Validation Accuracy: 74.38%
Epoch [4/100], Loss: 0.5892, Validation Accuracy: 81.00%
Epoch [5/100], Loss: 0.2976, Validation Accuracy: 84.88%
Epoch [6/100], Loss: 0.3876, Validation Accuracy: 86.12%
Epoch [7/100], Loss: 0.1351, Validation Accuracy: 88.12%
Epoch [8/100], Loss: 0.4037, Validation Accuracy: 89.12%
Epoch [9/100], Loss: 0.5056, Validation Accuracy: 89.25%
Epoch [10/100], Loss: 0.1265, Validation Accuracy: 90.12%
Epoch [11/100], Loss: 0.1735, Validation Accuracy: 90.25%
Epoch [12/100], Loss: 0.1958, Validation Accuracy: 91.00%
Epoch [13/100], Loss: 0.3312, Validation Accuracy: 90.50%
Epoch [14/100], Loss: 0.1755, Validation Accuracy: 90.25%
Epoch [15/100], Loss: 0.3453, Validation Accuracy: 90.75%
Epoch [16/100], Loss: 0.1386, Validation Accuracy: 88.12%
Epoch [17/100], Loss: 0.1494, Validation Accuracy: 89.88%
Epoch [18/100], Loss: 0

In [16]:
torch.save(model.state_dict(), 'save_models/'+'model_transformer_v1.pth')

In [18]:
test_label = []
model = TransformerClassifier().to(device)
model.load_state_dict(torch.load('save_models/model_transformer_v1.pth'))
model.eval()
with torch.no_grad():
    for test_data in test_loader:
        # Move validation data to GPU
        test_data = test_data.to(device)

        test_outputs = model(test_data)
        _, predicted = torch.max(test_outputs.data, 1)
        
        test_label.extend(predicted.tolist())



In [19]:
sum(test_label)

927

In [29]:
import optuna

def objective(trial):

    hidden_size = trial.suggest_int('hidden_size', 64, 256)
    num_heads = trial.suggest_int('num_heads', 2, 8)
    num_encoder_layers = trial.suggest_int('num_encoder_layers', 2, 8)
    dropout = trial.suggest_float('dropout', 0.0, 0.5)
    out_channels = trial.suggest_int('out_channels', 16, 64)
    kernel_size = trial.suggest_int('kernel_size', 3, 7)

    if hidden_size % num_heads != 0:
        hidden_size = (hidden_size // num_heads) * num_heads

    model = TransformerClassifier(
        hidden_size=hidden_size,
        num_heads=num_heads,
        num_encoder_layers=num_encoder_layers,
        dropout=dropout,
        out_channels=out_channels,
        kernel_size=kernel_size
    ).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

    num_epochs = 100

    model.train()
    for epoch in range(num_epochs):
        for batch_data, batch_labels in train_loader:
            # Move data to GPU
            batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)

            # Forward pass
            outputs = model(batch_data)
            loss = criterion(outputs, batch_labels.long())

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for val_data, val_labels in val_loader:
            # Move validation data to GPU
            val_data, val_labels = val_data.to(device), val_labels.to(device)

            val_outputs = model(val_data)
            _, predicted = torch.max(val_outputs.data, 1)
            total += val_labels.size(0)
            correct += (predicted == val_labels.long()).sum().item()

        accuracy = correct / total
        print(f'{hidden_size}_{num_heads}_{num_encoder_layers}_{dropout}_{out_channels}_{kernel_size}_Validation Accuracy: {100 * accuracy:.2f}%')
    
    torch.save(model.state_dict(), 'save_models/'+f'model_transformer_{hidden_size}_{num_heads}_{num_encoder_layers}_{dropout}_{out_channels}_{kernel_size}.pth')
    
    return accuracy


study = optuna.create_study(direction='maximize') 
study.optimize(objective, n_trials=50)  

best_params = study.best_params
best_value = study.best_value
print(f"Best hyperparameters: {best_params}")
print(f"Best value (accuracy): {best_value}")


[I 2023-12-22 22:03:12,405] A new study created in memory with name: no-name-d76b1ec4-5c14-4054-bc79-86747e044d41
[I 2023-12-22 22:05:55,981] Trial 0 finished with value: 0.9375 and parameters: {'hidden_size': 66, 'num_heads': 3, 'num_encoder_layers': 5, 'dropout': 0.26792001323294157, 'out_channels': 60, 'kernel_size': 6}. Best is trial 0 with value: 0.9375.


66_3_5_0.26792001323294157_60_6_Validation Accuracy: 93.75%


[I 2023-12-22 22:09:35,110] Trial 1 finished with value: 0.9425 and parameters: {'hidden_size': 133, 'num_heads': 5, 'num_encoder_layers': 7, 'dropout': 0.34948548858777095, 'out_channels': 39, 'kernel_size': 6}. Best is trial 1 with value: 0.9425.


130_5_7_0.34948548858777095_39_6_Validation Accuracy: 94.25%


[I 2023-12-22 22:13:48,738] Trial 2 finished with value: 0.93625 and parameters: {'hidden_size': 201, 'num_heads': 3, 'num_encoder_layers': 7, 'dropout': 0.15005351812746826, 'out_channels': 21, 'kernel_size': 6}. Best is trial 1 with value: 0.9425.


201_3_7_0.15005351812746826_21_6_Validation Accuracy: 93.62%


[I 2023-12-22 22:16:06,490] Trial 3 finished with value: 0.9475 and parameters: {'hidden_size': 141, 'num_heads': 8, 'num_encoder_layers': 4, 'dropout': 0.244532796804605, 'out_channels': 30, 'kernel_size': 3}. Best is trial 3 with value: 0.9475.


136_8_4_0.244532796804605_30_3_Validation Accuracy: 94.75%


[I 2023-12-22 22:19:47,607] Trial 4 finished with value: 0.93125 and parameters: {'hidden_size': 204, 'num_heads': 4, 'num_encoder_layers': 6, 'dropout': 0.3692458582355137, 'out_channels': 17, 'kernel_size': 3}. Best is trial 3 with value: 0.9475.


204_4_6_0.3692458582355137_17_3_Validation Accuracy: 93.12%


[I 2023-12-22 22:24:00,950] Trial 5 finished with value: 0.93 and parameters: {'hidden_size': 158, 'num_heads': 4, 'num_encoder_layers': 7, 'dropout': 0.479715076645308, 'out_channels': 61, 'kernel_size': 3}. Best is trial 3 with value: 0.9475.


156_4_7_0.479715076645308_61_3_Validation Accuracy: 93.00%


[I 2023-12-22 22:25:21,761] Trial 6 finished with value: 0.93875 and parameters: {'hidden_size': 64, 'num_heads': 8, 'num_encoder_layers': 2, 'dropout': 0.4753559428591169, 'out_channels': 51, 'kernel_size': 3}. Best is trial 3 with value: 0.9475.


64_8_2_0.4753559428591169_51_3_Validation Accuracy: 93.88%


[I 2023-12-22 22:27:36,622] Trial 7 finished with value: 0.91 and parameters: {'hidden_size': 80, 'num_heads': 3, 'num_encoder_layers': 4, 'dropout': 0.027067144476562277, 'out_channels': 16, 'kernel_size': 3}. Best is trial 3 with value: 0.9475.


78_3_4_0.027067144476562277_16_3_Validation Accuracy: 91.00%


[I 2023-12-22 22:29:17,189] Trial 8 finished with value: 0.945 and parameters: {'hidden_size': 186, 'num_heads': 3, 'num_encoder_layers': 2, 'dropout': 0.05692433040038153, 'out_channels': 54, 'kernel_size': 3}. Best is trial 3 with value: 0.9475.


186_3_2_0.05692433040038153_54_3_Validation Accuracy: 94.50%


[I 2023-12-22 22:30:44,889] Trial 9 finished with value: 0.93 and parameters: {'hidden_size': 119, 'num_heads': 7, 'num_encoder_layers': 2, 'dropout': 0.38035107113508826, 'out_channels': 40, 'kernel_size': 7}. Best is trial 3 with value: 0.9475.


119_7_2_0.38035107113508826_40_7_Validation Accuracy: 93.00%


[I 2023-12-22 22:33:43,052] Trial 10 finished with value: 0.9475 and parameters: {'hidden_size': 234, 'num_heads': 7, 'num_encoder_layers': 4, 'dropout': 0.16365174865789361, 'out_channels': 30, 'kernel_size': 4}. Best is trial 3 with value: 0.9475.


231_7_4_0.16365174865789361_30_4_Validation Accuracy: 94.75%


[I 2023-12-22 22:36:45,775] Trial 11 finished with value: 0.93625 and parameters: {'hidden_size': 254, 'num_heads': 7, 'num_encoder_layers': 4, 'dropout': 0.18315978673287775, 'out_channels': 29, 'kernel_size': 4}. Best is trial 3 with value: 0.9475.


252_7_4_0.18315978673287775_29_4_Validation Accuracy: 93.62%


[I 2023-12-22 22:39:46,029] Trial 12 finished with value: 0.92 and parameters: {'hidden_size': 253, 'num_heads': 8, 'num_encoder_layers': 4, 'dropout': 0.1763742996626421, 'out_channels': 31, 'kernel_size': 4}. Best is trial 3 with value: 0.9475.


248_8_4_0.1763742996626421_31_4_Validation Accuracy: 92.00%


[I 2023-12-22 22:41:39,879] Trial 13 finished with value: 0.935 and parameters: {'hidden_size': 113, 'num_heads': 6, 'num_encoder_layers': 3, 'dropout': 0.26028203134323874, 'out_channels': 29, 'kernel_size': 4}. Best is trial 3 with value: 0.9475.


108_6_3_0.26028203134323874_29_4_Validation Accuracy: 93.50%


[I 2023-12-22 22:44:48,672] Trial 14 finished with value: 0.94 and parameters: {'hidden_size': 164, 'num_heads': 7, 'num_encoder_layers': 5, 'dropout': 0.09027774302975328, 'out_channels': 33, 'kernel_size': 5}. Best is trial 3 with value: 0.9475.


161_7_5_0.09027774302975328_33_5_Validation Accuracy: 94.00%


[I 2023-12-22 22:47:06,932] Trial 15 finished with value: 0.93875 and parameters: {'hidden_size': 229, 'num_heads': 6, 'num_encoder_layers': 3, 'dropout': 0.12200015310323972, 'out_channels': 24, 'kernel_size': 5}. Best is trial 3 with value: 0.9475.


228_6_3_0.12200015310323972_24_5_Validation Accuracy: 93.88%


[I 2023-12-22 22:50:45,013] Trial 16 finished with value: 0.94625 and parameters: {'hidden_size': 154, 'num_heads': 8, 'num_encoder_layers': 6, 'dropout': 0.23436036147125466, 'out_channels': 45, 'kernel_size': 4}. Best is trial 3 with value: 0.9475.


152_8_6_0.23436036147125466_45_4_Validation Accuracy: 94.62%


[I 2023-12-22 22:52:36,378] Trial 17 finished with value: 0.94375 and parameters: {'hidden_size': 98, 'num_heads': 6, 'num_encoder_layers': 3, 'dropout': 0.2254157693110258, 'out_channels': 38, 'kernel_size': 4}. Best is trial 3 with value: 0.9475.


96_6_3_0.2254157693110258_38_4_Validation Accuracy: 94.38%


[I 2023-12-22 22:55:29,536] Trial 18 finished with value: 0.94375 and parameters: {'hidden_size': 225, 'num_heads': 7, 'num_encoder_layers': 4, 'dropout': 0.30438458173033317, 'out_channels': 24, 'kernel_size': 5}. Best is trial 3 with value: 0.9475.


224_7_4_0.30438458173033317_24_5_Validation Accuracy: 94.38%


[I 2023-12-22 23:00:19,937] Trial 19 finished with value: 0.94125 and parameters: {'hidden_size': 177, 'num_heads': 8, 'num_encoder_layers': 8, 'dropout': 0.10732803913523015, 'out_channels': 35, 'kernel_size': 4}. Best is trial 3 with value: 0.9475.


176_8_8_0.10732803913523015_35_4_Validation Accuracy: 94.12%


[I 2023-12-22 23:03:21,932] Trial 20 finished with value: 0.9375 and parameters: {'hidden_size': 136, 'num_heads': 5, 'num_encoder_layers': 5, 'dropout': 0.323079062743848, 'out_channels': 45, 'kernel_size': 5}. Best is trial 3 with value: 0.9475.


135_5_5_0.323079062743848_45_5_Validation Accuracy: 93.75%


[I 2023-12-22 23:06:56,700] Trial 21 finished with value: 0.9475 and parameters: {'hidden_size': 151, 'num_heads': 8, 'num_encoder_layers': 6, 'dropout': 0.21063163161580706, 'out_channels': 46, 'kernel_size': 4}. Best is trial 3 with value: 0.9475.


144_8_6_0.21063163161580706_46_4_Validation Accuracy: 94.75%


[I 2023-12-22 23:10:27,764] Trial 22 finished with value: 0.94875 and parameters: {'hidden_size': 142, 'num_heads': 8, 'num_encoder_layers': 6, 'dropout': 0.19000193056035242, 'out_channels': 45, 'kernel_size': 3}. Best is trial 22 with value: 0.94875.


136_8_6_0.19000193056035242_45_3_Validation Accuracy: 94.88%


[I 2023-12-22 23:13:26,951] Trial 23 finished with value: 0.93875 and parameters: {'hidden_size': 135, 'num_heads': 7, 'num_encoder_layers': 5, 'dropout': 0.16655303357074658, 'out_channels': 26, 'kernel_size': 3}. Best is trial 22 with value: 0.94875.


133_7_5_0.16655303357074658_26_3_Validation Accuracy: 93.88%


[I 2023-12-22 23:16:41,820] Trial 24 finished with value: 0.92375 and parameters: {'hidden_size': 101, 'num_heads': 8, 'num_encoder_layers': 6, 'dropout': 0.2754856912917151, 'out_channels': 35, 'kernel_size': 3}. Best is trial 22 with value: 0.94875.


96_8_6_0.2754856912917151_35_3_Validation Accuracy: 92.38%


[I 2023-12-22 23:19:25,842] Trial 25 finished with value: 0.9175 and parameters: {'hidden_size': 180, 'num_heads': 2, 'num_encoder_layers': 4, 'dropout': 0.41984858966999744, 'out_channels': 49, 'kernel_size': 3}. Best is trial 22 with value: 0.94875.


180_2_4_0.41984858966999744_49_3_Validation Accuracy: 91.75%


[I 2023-12-22 23:21:22,405] Trial 26 finished with value: 0.9475 and parameters: {'hidden_size': 122, 'num_heads': 6, 'num_encoder_layers': 3, 'dropout': 0.20037904909613158, 'out_channels': 42, 'kernel_size': 4}. Best is trial 22 with value: 0.94875.


120_6_3_0.20037904909613158_42_4_Validation Accuracy: 94.75%


[I 2023-12-22 23:26:27,127] Trial 27 finished with value: 0.9475 and parameters: {'hidden_size': 199, 'num_heads': 7, 'num_encoder_layers': 8, 'dropout': 0.07139888813206152, 'out_channels': 55, 'kernel_size': 3}. Best is trial 22 with value: 0.94875.


196_7_8_0.07139888813206152_55_3_Validation Accuracy: 94.75%


[I 2023-12-22 23:29:59,655] Trial 28 finished with value: 0.9075 and parameters: {'hidden_size': 232, 'num_heads': 8, 'num_encoder_layers': 5, 'dropout': 0.13471005870810232, 'out_channels': 36, 'kernel_size': 3}. Best is trial 22 with value: 0.94875.


232_8_5_0.13471005870810232_36_3_Validation Accuracy: 90.75%


[I 2023-12-22 23:33:13,885] Trial 29 finished with value: 0.93 and parameters: {'hidden_size': 170, 'num_heads': 7, 'num_encoder_layers': 5, 'dropout': 0.28956801716130537, 'out_channels': 64, 'kernel_size': 7}. Best is trial 22 with value: 0.94875.


168_7_5_0.28956801716130537_64_7_Validation Accuracy: 93.00%


[I 2023-12-22 23:35:45,058] Trial 30 finished with value: 0.93375 and parameters: {'hidden_size': 145, 'num_heads': 5, 'num_encoder_layers': 4, 'dropout': 0.011638093752502993, 'out_channels': 21, 'kernel_size': 4}. Best is trial 22 with value: 0.94875.


145_5_4_0.011638093752502993_21_4_Validation Accuracy: 93.38%


[I 2023-12-22 23:39:18,590] Trial 31 finished with value: 0.95 and parameters: {'hidden_size': 148, 'num_heads': 8, 'num_encoder_layers': 6, 'dropout': 0.21008686491134282, 'out_channels': 45, 'kernel_size': 5}. Best is trial 31 with value: 0.95.


144_8_6_0.21008686491134282_45_5_Validation Accuracy: 95.00%


[I 2023-12-22 23:42:44,820] Trial 32 finished with value: 0.94 and parameters: {'hidden_size': 144, 'num_heads': 8, 'num_encoder_layers': 6, 'dropout': 0.25007419908212886, 'out_channels': 42, 'kernel_size': 6}. Best is trial 31 with value: 0.95.


144_8_6_0.25007419908212886_42_6_Validation Accuracy: 94.00%


[I 2023-12-22 23:46:19,898] Trial 33 finished with value: 0.91125 and parameters: {'hidden_size': 127, 'num_heads': 8, 'num_encoder_layers': 7, 'dropout': 0.20226331277270612, 'out_channels': 51, 'kernel_size': 6}. Best is trial 31 with value: 0.95.


120_8_7_0.20226331277270612_51_6_Validation Accuracy: 91.12%


[I 2023-12-22 23:49:19,242] Trial 34 finished with value: 0.945 and parameters: {'hidden_size': 107, 'num_heads': 7, 'num_encoder_layers': 6, 'dropout': 0.15891541002406873, 'out_channels': 39, 'kernel_size': 5}. Best is trial 31 with value: 0.95.


105_7_6_0.15891541002406873_39_5_Validation Accuracy: 94.50%


[I 2023-12-22 23:52:10,194] Trial 35 finished with value: 0.94375 and parameters: {'hidden_size': 167, 'num_heads': 8, 'num_encoder_layers': 5, 'dropout': 0.33052717394405035, 'out_channels': 47, 'kernel_size': 5}. Best is trial 31 with value: 0.95.


160_8_5_0.33052717394405035_47_5_Validation Accuracy: 94.38%


[I 2023-12-22 23:55:32,170] Trial 36 finished with value: 0.935 and parameters: {'hidden_size': 87, 'num_heads': 6, 'num_encoder_layers': 7, 'dropout': 0.23550378106056524, 'out_channels': 32, 'kernel_size': 3}. Best is trial 31 with value: 0.95.


84_6_7_0.23550378106056524_32_3_Validation Accuracy: 93.50%


[I 2023-12-22 23:59:08,913] Trial 37 finished with value: 0.93375 and parameters: {'hidden_size': 191, 'num_heads': 7, 'num_encoder_layers': 6, 'dropout': 0.18555091824141792, 'out_channels': 42, 'kernel_size': 4}. Best is trial 31 with value: 0.95.


189_7_6_0.18555091824141792_42_4_Validation Accuracy: 93.38%


[I 2023-12-23 00:03:26,573] Trial 38 finished with value: 0.945 and parameters: {'hidden_size': 214, 'num_heads': 4, 'num_encoder_layers': 7, 'dropout': 0.13810151225177458, 'out_channels': 57, 'kernel_size': 3}. Best is trial 31 with value: 0.95.


212_4_7_0.13810151225177458_57_3_Validation Accuracy: 94.50%


[I 2023-12-23 00:05:48,953] Trial 39 finished with value: 0.9425 and parameters: {'hidden_size': 140, 'num_heads': 8, 'num_encoder_layers': 4, 'dropout': 0.10331907250663103, 'out_channels': 28, 'kernel_size': 6}. Best is trial 31 with value: 0.95.


136_8_4_0.10331907250663103_28_6_Validation Accuracy: 94.25%


[I 2023-12-23 00:08:55,843] Trial 40 finished with value: 0.88625 and parameters: {'hidden_size': 159, 'num_heads': 8, 'num_encoder_layers': 5, 'dropout': 0.27328649775000335, 'out_channels': 49, 'kernel_size': 5}. Best is trial 31 with value: 0.95.


152_8_5_0.27328649775000335_49_5_Validation Accuracy: 88.62%


[I 2023-12-23 00:12:28,702] Trial 41 finished with value: 0.9425 and parameters: {'hidden_size': 150, 'num_heads': 8, 'num_encoder_layers': 6, 'dropout': 0.21741851228794914, 'out_channels': 45, 'kernel_size': 4}. Best is trial 31 with value: 0.95.


144_8_6_0.21741851228794914_45_4_Validation Accuracy: 94.25%


[I 2023-12-23 00:16:22,530] Trial 42 finished with value: 0.92375 and parameters: {'hidden_size': 127, 'num_heads': 8, 'num_encoder_layers': 7, 'dropout': 0.20071267946786414, 'out_channels': 43, 'kernel_size': 3}. Best is trial 31 with value: 0.95.


120_8_7_0.20071267946786414_43_3_Validation Accuracy: 92.38%


[I 2023-12-23 00:20:16,245] Trial 43 finished with value: 0.94625 and parameters: {'hidden_size': 154, 'num_heads': 7, 'num_encoder_layers': 6, 'dropout': 0.15876154890751998, 'out_channels': 52, 'kernel_size': 4}. Best is trial 31 with value: 0.95.


154_7_6_0.15876154890751998_52_4_Validation Accuracy: 94.62%


[I 2023-12-23 00:23:50,082] Trial 44 finished with value: 0.945 and parameters: {'hidden_size': 176, 'num_heads': 8, 'num_encoder_layers': 5, 'dropout': 0.25544134107126365, 'out_channels': 48, 'kernel_size': 3}. Best is trial 31 with value: 0.95.


176_8_5_0.25544134107126365_48_3_Validation Accuracy: 94.50%


[I 2023-12-23 00:27:16,060] Trial 45 finished with value: 0.94375 and parameters: {'hidden_size': 116, 'num_heads': 7, 'num_encoder_layers': 6, 'dropout': 0.21300715229741393, 'out_channels': 19, 'kernel_size': 4}. Best is trial 31 with value: 0.95.


112_7_6_0.21300715229741393_19_4_Validation Accuracy: 94.38%


[I 2023-12-23 00:29:19,242] Trial 46 finished with value: 0.945 and parameters: {'hidden_size': 147, 'num_heads': 8, 'num_encoder_layers': 3, 'dropout': 0.17350554667548507, 'out_channels': 39, 'kernel_size': 5}. Best is trial 31 with value: 0.95.


144_8_3_0.17350554667548507_39_5_Validation Accuracy: 94.50%


[I 2023-12-23 00:31:59,368] Trial 47 finished with value: 0.9525 and parameters: {'hidden_size': 161, 'num_heads': 8, 'num_encoder_layers': 4, 'dropout': 0.1416134672882693, 'out_channels': 37, 'kernel_size': 3}. Best is trial 47 with value: 0.9525.


160_8_4_0.1416134672882693_37_3_Validation Accuracy: 95.25%


[I 2023-12-23 00:35:00,735] Trial 48 finished with value: 0.93625 and parameters: {'hidden_size': 242, 'num_heads': 7, 'num_encoder_layers': 4, 'dropout': 0.14264517872164809, 'out_channels': 30, 'kernel_size': 3}. Best is trial 47 with value: 0.9525.


238_7_4_0.14264517872164809_30_3_Validation Accuracy: 93.62%


[I 2023-12-23 00:37:47,230] Trial 49 finished with value: 0.93875 and parameters: {'hidden_size': 189, 'num_heads': 6, 'num_encoder_layers': 4, 'dropout': 0.06634575697243457, 'out_channels': 33, 'kernel_size': 3}. Best is trial 47 with value: 0.9525.


186_6_4_0.06634575697243457_33_3_Validation Accuracy: 93.88%
Best hyperparameters: {'hidden_size': 161, 'num_heads': 8, 'num_encoder_layers': 4, 'dropout': 0.1416134672882693, 'out_channels': 37, 'kernel_size': 3}
Best value (accuracy): 0.9525
