In [1]:
import torch

def testing_model(model, test_loader, device = torch.device('cpu')):
    """
    Evaluate the model on the validation set.
    """
    model.eval()
    model.to(device)
    correct, total = 0, 0
    with torch.no_grad():
        for features, targets in test_loader:
            features = features.to(device, dtype=torch.float32)
            targets = targets.to(device, dtype=torch.long)

            outputs = model(features)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    test_accuracy = 100 * correct / total
    return test_accuracy

def predict(model, data, device = torch.device('cpu')):
    with torch.no_grad():
        features = data[0].to(device, dtype=torch.float32)  # Shape: [77]
        features = features.unsqueeze(0) # Shape: [1, 77] (add channel dim)
        outputs = model(features)
        _, predicted = torch.max(outputs, 1)
    return predicted.item() + 3

In [2]:
from dataset import CustomDataset
import pandas as pd

test_1 = pd.read_csv("scaled_test_1.csv", index_col=0)
test_2 = pd.read_csv("scaled_test_2.csv", index_col=0)
test_3 = pd.read_csv("scaled_test_3.csv", index_col=0)

In [3]:
test_1['key_length'].value_counts()

key_length
19    232
23    232
21    227
8     222
12    222
16    221
6     220
17    218
22    217
18    215
7     213
9     205
5     203
25    202
10    202
11    200
24    200
13    198
15    198
14    193
20    191
4     185
3     184
Name: count, dtype: int64

In [4]:
test_dataset_1 = CustomDataset(test_1)
test_dataset_2 = CustomDataset(test_2)
test_dataset_3 = CustomDataset(test_3)

In [5]:
from torch.utils.data import DataLoader
BATCH_SIZE=128
test_loader_1 = DataLoader(test_dataset_1, batch_size=BATCH_SIZE, shuffle=False)
test_loader_2 = DataLoader(test_dataset_2, batch_size=BATCH_SIZE, shuffle=False)
test_loader_3 = DataLoader(test_dataset_3, batch_size=BATCH_SIZE, shuffle=False)

In [6]:
from model import KeyLengthCNN
num_classes = len(test_dataset_1[0][0])
model = KeyLengthCNN(input_size=num_classes)
model.load_state_dict(torch.load('best_model.pth', weights_only=True))

<All keys matched successfully>

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Testing on {len(test_1)} samples whose length is smaller than 300: \n{testing_model(model, test_loader_1, device):.3f}%')
print(f'Testing on {len(test_2)} samples whose length is between 300 and 399: \n{testing_model(model, test_loader_2, device):.3f}%')
print(f'Testing on {len(test_3)} samples whose length is greater or equal to 400: \n{testing_model(model, test_loader_3, device):.3f}%')

Testing on 4800 samples whose length is smaller than 300: 
95.021%
Testing on 4800 samples whose length is between 300 and 399: 
98.750%
Testing on 9648 samples whose length is greater or equal to 400: 
99.109%


In [10]:
from extract_features import extract_features
import pandas as pd
from dataset import rescale_dataset
from viginere import encrypt_vigenere
from utils import clean_text

In [None]:
text = """Accepted presentations will not be published in any proceedings, 
however, viewgraphs and other materials will be reproduced for 
seminar attendees.

ABSTRACTS: Authors should submit a one page abstract and/or videotape to:

     Robert Lipman
     Naval Surface Warfare Center, Carderock Division
     Code 2042
     Bethesda, Maryland  20084-5000

     VOICE (301) 227-3618;  FAX (301) 227-5753  """


key = 'PHAMVIETGIANG'

In [11]:
text = clean_text(text)
len(text)

266

In [12]:
ciphertext = encrypt_vigenere(text, key)
data = extract_features(ciphertext)
data['key_length'] = 0
rescaled_data = rescale_dataset(pd.DataFrame([data]))

Tải StandardScaler cho twist_columns từ scalers\standard_scaler_twist.pkl
Tải MinMaxScaler cho other_columns từ scalers\minmax_scaler_other.pkl


In [13]:
input = CustomDataset(rescaled_data)[0]

In [14]:
input[0]

tensor([-4.7115e-01,  1.2339e-01, -6.2426e-01,  1.3451e-01, -9.0393e-01,
        -9.2706e-03, -1.1989e+00, -8.8036e-01,  6.3266e-01, -4.6220e-01,
        -7.1804e-01, -2.4463e+00, -1.0428e+00, -8.4077e-01, -1.0697e+00,
        -1.1073e+00, -7.7100e-01, -7.2687e-01, -3.9380e-01, -3.4512e-01,
        -3.0878e-01,  1.6293e+00,  1.5068e+00,  1.3707e+00, -3.7831e-01,
         3.8849e-01, -5.1888e-01,  5.5127e-01, -5.8641e-01,  6.8659e-01,
        -5.0939e-01, -4.9747e-01,  1.0724e+00, -4.1848e-01,  8.9442e-01,
        -1.9152e+00,  6.4339e-01,  2.3724e-01, -4.9416e-02, -3.3252e-01,
         2.2863e-01, -3.4236e-01,  2.4062e-01,  1.0228e-03,  4.7158e-02,
        -2.1078e-01,  2.8974e-01,  0.0000e+00,  2.2000e-01,  0.0000e+00,
         6.6000e-02,  3.8871e-02,  3.7970e-01,  2.1053e-01,  3.9012e-02,
         4.1739e-02,  3.8483e-02,  4.1371e-02,  3.5359e-02,  4.5691e-02,
         4.3423e-02,  3.4291e-02,  3.8893e-02,  4.2208e-02,  6.4411e-02,
         3.5088e-02,  3.5022e-02,  4.0411e-02,  3.2

In [17]:
result = predict(model, input, device=device)
result

13

In [18]:
len(key), key 

(13, 'PHAMVIETGIANG')

In [19]:
len(key) == result

True