Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test(tuner): add test for overfitting (#109)
* test(tuner): add test for overfitting * fix: apply suggestions from code review Co-authored-by: Wang Bo <bo.wang@jina.ai> * fix(tuner): dim Co-authored-by: Wang Bo <bo.wang@jina.ai>
- Loading branch information
Showing
5 changed files
with
253 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,5 @@ numpy | |
tensorflow | ||
paddlepaddle | ||
torch | ||
torchvision | ||
torchvision | ||
scipy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import pytest | ||
import tensorflow as tf | ||
from scipy.spatial.distance import pdist, squareform | ||
|
||
from finetuner.tuner.keras import KerasTuner | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"n_cls,dim,n_samples,n_epochs,batch_size,head_layer", | ||
[ | ||
(5, 10, 100, 5, 25, 'TripletLayer'), | ||
(5, 10, 1000, 15, 256, 'CosineLayer'), # Cosine needs more training to converge | ||
], | ||
) | ||
def test_overfit_keras( | ||
create_easy_data, | ||
n_cls: int, | ||
dim: int, | ||
n_samples: int, | ||
n_epochs: int, | ||
batch_size: int, | ||
head_layer: str, | ||
): | ||
"""This test makes sure that we can overfit the model to a small amount of data. | ||
We use an over-parametrized model (a few thousand weights for <100 unique input | ||
vectors), which should easily be able to bring vectors from same class | ||
together, and put those from different classes apart - note that all the vectors | ||
are random. | ||
""" | ||
|
||
# Prepare model and data | ||
data, vecs = create_easy_data(n_cls, dim, n_samples) | ||
embed_model = tf.keras.Sequential( | ||
[ | ||
tf.keras.layers.Flatten(input_shape=(dim,)), | ||
tf.keras.layers.Dense(64, activation='relu'), | ||
tf.keras.layers.Dense(64, activation='relu'), | ||
tf.keras.layers.Dense(64, activation='relu'), | ||
tf.keras.layers.Dense(32), | ||
] | ||
) | ||
|
||
# Train | ||
pt = KerasTuner(embed_model, head_layer=head_layer) | ||
pt.fit(train_data=data, epochs=n_epochs, batch_size=batch_size) | ||
|
||
# Compute embedding for original vectors | ||
vec_embedings = embed_model(vecs).numpy() | ||
|
||
# Compute distances between embeddings | ||
metric = 'sqeuclidean' if head_layer == 'TripletLayer' else 'cosine' | ||
dists = squareform(pdist(vec_embedings, metric=metric)) | ||
|
||
# Make sure that for each class, the two instances are closer than | ||
# anything else | ||
for i in range(n_cls): | ||
cls_dist = dists[2 * i, 2 * i + 1] | ||
dist_other = dists[2 * i : 2 * i + 2, :].copy() | ||
dist_other[:, 2 * i : 2 * i + 2] = 10_000 | ||
|
||
assert cls_dist < dist_other.min() + 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import paddle | ||
import pytest | ||
from paddle import nn | ||
from scipy.spatial.distance import pdist, squareform | ||
|
||
from finetuner.tuner.paddle import PaddleTuner | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"n_cls,dim,n_samples,n_epochs,batch_size,head_layer", | ||
[ | ||
(5, 10, 100, 5, 25, 'TripletLayer'), | ||
(5, 10, 1000, 15, 256, 'CosineLayer'), # Cosine needs more training to converge | ||
], | ||
) | ||
def test_overfit_paddle( | ||
create_easy_data, | ||
n_cls: int, | ||
dim: int, | ||
n_samples: int, | ||
n_epochs: int, | ||
batch_size: int, | ||
head_layer: str, | ||
): | ||
"""This test makes sure that we can overfit the model to a small amount of data. | ||
We use an over-parametrized model (a few thousand weights for <100 unique input | ||
vectors), which should easily be able to bring vectors from same class | ||
together, and put those from different classes apart - note that all the vectors | ||
are random. | ||
""" | ||
|
||
# Prepare model and data | ||
data, vecs = create_easy_data(n_cls, dim, n_samples) | ||
embed_model = nn.Sequential( | ||
nn.Flatten(), | ||
nn.Linear(in_features=dim, out_features=64), | ||
nn.ReLU(), | ||
nn.Linear(in_features=64, out_features=64), | ||
nn.ReLU(), | ||
nn.Linear(in_features=64, out_features=64), | ||
nn.ReLU(), | ||
nn.Linear(in_features=64, out_features=32), | ||
) | ||
|
||
# Train | ||
pt = PaddleTuner(embed_model, head_layer=head_layer) | ||
pt.fit(train_data=data, epochs=n_epochs, batch_size=batch_size) | ||
|
||
# Compute embedding for original vectors | ||
vec_embedings = embed_model(paddle.Tensor(vecs)).numpy() | ||
|
||
# Compute distances between embeddings | ||
metric = 'sqeuclidean' if head_layer == 'TripletLayer' else 'cosine' | ||
dists = squareform(pdist(vec_embedings, metric=metric)) | ||
|
||
# Make sure that for each class, the two instances are closer than | ||
# anything else | ||
for i in range(n_cls): | ||
cls_dist = dists[2 * i, 2 * i + 1] | ||
dist_other = dists[2 * i : 2 * i + 2, :].copy() | ||
dist_other[:, 2 * i : 2 * i + 2] = 10_000 | ||
|
||
assert cls_dist < dist_other.min() + 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import pytest | ||
import torch | ||
from scipy.spatial.distance import pdist, squareform | ||
|
||
from finetuner.tuner.pytorch import PytorchTuner | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"n_cls,dim,n_samples,n_epochs,batch_size,head_layer", | ||
[ | ||
(5, 10, 100, 5, 25, 'TripletLayer'), | ||
(5, 10, 1000, 15, 256, 'CosineLayer'), # Cosine needs more training to converge | ||
], | ||
) | ||
def test_overfit_pytorch( | ||
create_easy_data, | ||
n_cls: int, | ||
dim: int, | ||
n_samples: int, | ||
n_epochs: int, | ||
batch_size: int, | ||
head_layer: str, | ||
): | ||
"""This test makes sure that we can overfit the model to a small amount of data. | ||
We use an over-parametrized model (a few thousand weights for <100 unique input | ||
vectors), which should easily be able to bring vectors from same class | ||
together, and put those from different classes apart - note that all the vectors | ||
are random. | ||
""" | ||
|
||
# Prepare model and data | ||
data, vecs = create_easy_data(n_cls, dim, n_samples) | ||
embed_model = torch.nn.Sequential( | ||
torch.nn.Flatten(), | ||
torch.nn.Linear(in_features=dim, out_features=64), | ||
torch.nn.ReLU(), | ||
torch.nn.Linear(in_features=64, out_features=64), | ||
torch.nn.ReLU(), | ||
torch.nn.Linear(in_features=64, out_features=64), | ||
torch.nn.ReLU(), | ||
torch.nn.Linear(in_features=64, out_features=32), | ||
) | ||
|
||
# Train | ||
pt = PytorchTuner(embed_model, head_layer=head_layer) | ||
pt.fit(train_data=data, epochs=n_epochs, batch_size=batch_size) | ||
|
||
# Compute embedding for original vectors | ||
with torch.inference_mode(): | ||
vec_embedings = embed_model(torch.Tensor(vecs)).numpy() | ||
|
||
# Compute distances between embeddings | ||
metric = 'sqeuclidean' if head_layer == 'TripletLayer' else 'cosine' | ||
dists = squareform(pdist(vec_embedings, metric=metric)) | ||
|
||
# Make sure that for each class, the two instances are closer than | ||
# anything else | ||
for i in range(n_cls): | ||
cls_dist = dists[2 * i, 2 * i + 1] | ||
dist_other = dists[2 * i : 2 * i + 2, :].copy() | ||
dist_other[:, 2 * i : 2 * i + 2] = 10_000 | ||
|
||
assert cls_dist < dist_other.min() + 1 |