In [46]:
using Pkg
Pkg.activate("..")

# Load module
push!(LOAD_PATH, "../src")
using WriterVerifier

using Random
Random.seed!(42);

[32m[1m  Activating[22m[39m project at `c:\Users\igorp\WriterVerifier`


In [47]:
# Load data
println("Loading images...")

image_folder = "../data/words"

# Load images
writers = load_images(image_folder; max_per_writer=100000);

Loading images...
Loaded 76 writers
f01: 857 images
d01: 960 images
g05: 225 images
e07: 1371 images
e02: 877 images
a04: 1918 images
p02: 1578 images
p06: 775 images
r02: 1359 images
h07: 3017 images
b04: 2196 images
k02: 971 images
d03: 145 images
d06: 2116 images
c03: 4435 images
b01: 1832 images
c04: 2320 images
f03: 291 images
c02: 1141 images
g06: 12114 images
l07: 1151 images
j07: 344 images
k04: 2429 images
e06: 1140 images
l03: 205 images
a03: 1652 images
r03: 1123 images
n06: 1593 images
j04: 1332 images
r06: 2111 images
n02: 1790 images
g03: 725 images
k01: 86 images
j06: 740 images
h04: 1252 images
m02: 1379 images
b05: 1182 images
g02: 299 images
k03: 520 images
c01: 249 images
f02: 864 images
p03: 2173 images
p01: 337 images
a05: 1943 images
d05: 456 images
m01: 1663 images
g04: 2059 images
h06: 613 images
f04: 2060 images
n01: 687 images
j01: 514 images
b02: 430 images
b03: 519 images
l04: 2331 images
b06: 1996 images
n04: 2377 images
e01: 1284 images
l01: 1610 images
a0

In [48]:
# Create pairs
pairs, labels = create_pairs(writers; positive=50, negative=50);

Creating positive pairs...
Creating negative pairs...
Created 100 pairs
Positive: 50
Negative: 50


In [49]:
# Create model
model = create_model()
println("Model created")


Model created


In [None]:
# Model training

trained_model, history = train_model!(
    model, pairs, labels;
    epochs=50, 
    batch_size=64,
    learning_rate=0.00001
)

println("Training completed")

final_acc = round(history["val_acc"][end] * 100, digits=1)
println("Final accuracy: $(final_acc)%")

Starting training...
Training data: 80
Validation data: 20

Epoch 1/50
Train loss: 0.7008
Val loss: 0.6935
Val accuracy: 50.0%

Epoch 2/50
Train loss: 0.6911
Val loss: 0.6942
Val accuracy: 45.0%

Epoch 3/50
Train loss: 0.6994
Val loss: 0.6937
Val accuracy: 45.0%

Epoch 4/50
Train loss: 0.6882
Val loss: 0.6938
Val accuracy: 55.0%

Epoch 5/50
Train loss: 0.6761
Val loss: 0.6937
Val accuracy: 55.0%

Epoch 6/50
Train loss: 0.687
Val loss: 0.6934
Val accuracy: 55.0%

Epoch 7/50
Train loss: 0.6938
Val loss: 0.6937
Val accuracy: 55.0%

Epoch 8/50
Train loss: 0.7042
Val loss: 0.6939
Val accuracy: 45.0%

Epoch 9/50
Train loss: 0.696
Val loss: 0.6937
Val accuracy: 50.0%

Epoch 10/50
Train loss: 0.6833
Val loss: 0.6939
Val accuracy: 45.0%

Epoch 11/50
Train loss: 0.6907
Val loss: 0.6938
Val accuracy: 40.0%

Epoch 12/50
Train loss: 0.7026
Val loss: 0.6939
Val accuracy: 40.0%

Epoch 13/50
Train loss: 0.6907
Val loss: 0.6937
Val accuracy: 40.0%

Epoch 14/50
Train loss: 0.6972
Val loss: 0.6938
Val ac

In [51]:
# Testing on real examples

# Select random test examples
idx = randperm(length(pairs))[1:min(6, length(pairs))]
test_pairs = pairs[idx]
test_labels = labels[idx]

for i in 1:length(test_pairs)
    path1, path2 = test_pairs[i]
    true_label = test_labels[i]
    
    # Extract writer names
    writer1 = split(basename(path1), "-")[1]
    writer2 = split(basename(path2), "-")[1]
    
    # Test similarity
    similarity = test_similarity(trained_model, path1, path2)
    
    # Interpret results
    prediction = similarity > 0.5 ? "SAME" : "DIFFERENT"
    truth = true_label == 1 ? "SAME" : "DIFFERENT"
    correct = (similarity > 0.5) == (true_label == 1) ? "✓" : "X"
    
    println("$i. $writer1 vs $writer2")
    println("   Similarity: $(round(similarity, digits=3))")
    println("   Prediction: $prediction | Truth: $truth $correct")
    println()
end


1. a02 vs a02
   Similarity: 0.502
   Prediction: SAME | Truth: SAME ✓

2. b03 vs b03
   Similarity: 0.498
   Prediction: DIFFERENT | Truth: SAME X

3. h04 vs h01
   Similarity: 0.49
   Prediction: DIFFERENT | Truth: DIFFERENT ✓

4. j04 vs j04
   Similarity: 0.497
   Prediction: DIFFERENT | Truth: SAME X

5. r03 vs r03
   Similarity: 0.468
   Prediction: DIFFERENT | Truth: SAME X

6. d03 vs c04
   Similarity: 0.497
   Prediction: DIFFERENT | Truth: DIFFERENT ✓



In [52]:
# Model saving
try
    if !isdir("../models")
        mkdir("../models")
    end
    
    save_model(trained_model, "../models/model.jld2")
    
catch e
    println("Save error: $e")
end

Model saved: ../models/model.jld2


In [53]:
# Load my data
println("Loading images...")

image_folder = "../data/my_words"

# Load images
writers = load_images(image_folder; max_per_writer=10000);

Loading images...
Loaded 2 writers
a01: 4 images
my: 4 images


In [54]:
model = load_model("../models/model.jld2")

Model loaded: ../models/model.jld2


SiameseNetwork(Chain(Conv((5, 5), 1 => 32, relu, pad=2), MaxPool((2, 2)), Conv((3, 3), 32 => 64, relu, pad=1), MaxPool((2, 2)), Conv((3, 3), 64 => 128, relu, pad=1), MaxPool((2, 2)), flatten, Dense(65536 => 256, relu), Dropout(0.5, active=false), Dense(256 => 128)), Chain(Dense(128 => 64, relu), Dropout(0.3, active=false), Dense(64 => 32, relu), Dense(32 => 1, σ)))

In [55]:
pairs, labels = create_pairs(writers, positive=4, negative=4)



Creating positive pairs...
Creating negative pairs...
Created 5 pairs
Positive: 2
Negative: 3


(Any[("../data/my_words\\a01-000u-00-06.png", "../data/my_words\\a01-000u-01-04.png"), ("../data/my_words\\my-04.png", "../data/my_words\\my-03.png"), ("../data/my_words\\my-04.png", "../data/my_words\\a01-000u-00-06.png"), ("../data/my_words\\my-02.png", "../data/my_words\\a01-000u-01-02.png"), ("../data/my_words\\my-01.png", "../data/my_words\\a01-000u-00-00.png")], Any[1, 1, 0, 0, 0])

In [56]:
for i in 1:length(pairs)
    true_label = labels[i]
    path1, path2 = pairs[i]

    # Extract writer names
    writer1 = split(basename(path1), "-")[1]
    writer2 = split(basename(path2), "-")[1]

    similarity = test_similarity(model, path1, path2)


    # Interpret results
    prediction = similarity > 0.5 ? "SAME" : "DIFFERENT"
    truth = true_label == 1 ? "SAME" : "DIFFERENT"
    correct = (similarity > 0.5) == (true_label == 1) ? "✓" : "X"

    println("$i. $writer1 vs $writer2")
    println("   Similarity: $(round(similarity, digits=3))")
    println("   Prediction: $prediction | Truth: $truth $correct")
    println()
end

1. a01 vs a01
   Similarity: 0.496
   Prediction: DIFFERENT | Truth: SAME X

2. my vs my
   Similarity: 0.491
   Prediction: DIFFERENT | Truth: SAME X

3. my vs a01
   Similarity: 0.46
   Prediction: DIFFERENT | Truth: DIFFERENT ✓

4. my vs a01
   Similarity: 0.471
   Prediction: DIFFERENT | Truth: DIFFERENT ✓

5. my vs a01
   Similarity: 0.495
   Prediction: DIFFERENT | Truth: DIFFERENT ✓

