docs: update artifacts, lr, and results for tll experiment (#599)

jina-ai · Nov 8, 2022 · 5f665dd · 5f665dd
1 parent 1ab1372
commit 5f665dd
Show file tree

Hide file tree

Showing 4 changed files with 47 additions and 45 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,7 +28,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Docs
 
-- Add documentation for `WandBLogger`. [#600](https://github.com/jina-ai/finetuner/pull/600)
+- Change datasets and hyperparameters for ResNet experiment. ([#599](https://github.com/jina-ai/finetuner/pull/599))
+
+- Add documentation for `WandBLogger`. ([#600](https://github.com/jina-ai/finetuner/pull/600))
 
 
 ## [0.6.4] - 2022-10-27

diff --git a/README.md b/README.md
@@ -67,16 +67,16 @@ With Finetuner, one can easily uplift pre-trained models to be more performant a
     <td rowspan="2">ResNet</td>
     <td rowspan="2">Visual similarity search on <a href="https://sites.google.com/view/totally-looks-like-dataset">TLL</a></td>
     <td>mAP</td>
-    <td>0.102</td>
-    <td>0.166</td>
-    <td><span style="color:green">62.7%</span></td>
+    <td>0.110</td>
+    <td>0.196</td>
+    <td><span style="color:green">78.2%</span></td>
     <td rowspan="2"><p align=center><a href="https://colab.research.google.com/drive/1QuUTy3iVR-kTPljkwplKYaJ-NTCgPEc_?usp=sharing"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg"></a></p></td>
   </tr>
   <tr>
     <td>Recall</td>
-    <td>0.235</td>
-    <td>0.372</td>
-    <td><span style="color:green">58.3%</span></td>
+    <td>0.249</td>
+    <td>0.460</td>
+    <td><span style="color:green">84.7%</span></td>
   </tr>
   <tr>
     <td rowspan="2">CLIP</td>
@@ -97,7 +97,7 @@ With Finetuner, one can easily uplift pre-trained models to be more performant a
 </tbody>
 </table>
 
-<sub><sup>All metrics are evaluated on k@20 after training for 5 epochs using Adam optimizer with learning rates of 1e-7 for CLIP and 1e-5 for the other models.</sup></sub>
+<sub><sup>All metrics are evaluated on k@20 after training for 5 epochs using Adam optimizer with learning rates of 1e-4 for ResNet, 1e-7 for CLIP and 1e-5 for the BERT models.</sup></sub>
 
 <!-- start install-instruction -->
 
@@ -138,11 +138,11 @@ finetuner.login()  # use finetuner.notebook_login() in Jupyter notebook/Google C
 run = finetuner.fit(
     model='resnet50',
     run_name='resnet50-tll-run',
-    train_data='tll-train-da',
+    train_data='tll-train-data',
     callbacks=[
         EvaluationCallback(
-            query_data='tll-test-query-da',
-            index_data='tll-test-index-da',
+            query_data='tll-test-query-data',
+            index_data='tll-test-index-data',
         )
     ],
 )

diff --git a/docs/notebooks/image_to_image.ipynb b/docs/notebooks/image_to_image.ipynb
@@ -68,7 +68,7 @@
         "## Data\n",
         "\n",
         "Our journey starts locally. We have to prepare the data and push it to the Jina AI Cloud and Finetuner will be able to get the dataset by its name. For this example,\n",
-        "we already prepared the data, and we'll provide the names of training data (`tll-train-da`) directly to Finetuner.\n",
+        "we already prepared the data, and we'll provide the names of training data (`tll-train-data`) directly to Finetuner.\n",
         "\n",
         "```{important} \n",
         "We don't require you to push data to the Jina AI Cloud by yourself. Instead of a name, you can provide a `DocumentArray` and Finetuner will do the job for you.\n",
@@ -99,9 +99,9 @@
     {
       "cell_type": "code",
       "source": [
-        "train_data = DocumentArray.pull('tll-train-da', show_progress=True)\n",
-        "query_data = DocumentArray.pull('tll-test-query-da', show_progress=True)\n",
-        "index_data = DocumentArray.pull('tll-test-index-da', show_progress=True)\n",
+        "train_data = DocumentArray.pull('tll-train-data', show_progress=True)\n",
+        "query_data = DocumentArray.pull('tll-test-query-data', show_progress=True)\n",
+        "index_data = DocumentArray.pull('tll-test-index-data', show_progress=True)\n",
         "\n",
         "train_data.summary()"
       ],
@@ -142,15 +142,15 @@
         "\n",
         "run = finetuner.fit(\n",
         "    model='resnet50',\n",
-        "    train_data='tll-train-da',\n",
+        "    train_data='tll-train-data',\n",
         "    batch_size=128,\n",
         "    epochs=5,\n",
-        "    learning_rate=1e-5,\n",
+        "    learning_rate=1e-4,\n",
         "    device='cuda',\n",
         "    callbacks=[\n",
         "        EvaluationCallback(\n",
-        "            query_data='tll-test-query-da',\n",
-        "            index_data='tll-test-index-da',\n",
+        "            query_data='tll-test-query-data',\n",
+        "            index_data='tll-test-index-data',\n",
         "        )\n",
         "    ],\n",
         ")"
@@ -229,15 +229,15 @@
         "\n",
         "```bash\n",
         "  Training [5/5] ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 76/76 0:00:00 0:03:15 • loss: 0.003\n",
-        "[16:39:13] DEBUG    Metric: 'model_average_precision' Value: 0.16603                                     __main__.py:202\n",
-        "           DEBUG    Metric: 'model_dcg_at_k' Value: 0.23632                                              __main__.py:202\n",
-        "           DEBUG    Metric: 'model_f1_score_at_k' Value: 0.03544                                         __main__.py:202\n",
-        "           DEBUG    Metric: 'model_hit_at_k' Value: 0.37209                                              __main__.py:202\n",
-        "           DEBUG    Metric: 'model_ndcg_at_k' Value: 0.23632                                             __main__.py:202\n",
-        "           DEBUG    Metric: 'model_precision_at_k' Value: 0.01860                                        __main__.py:202\n",
-        "           DEBUG    Metric: 'model_r_precision' Value: 0.16603                                           __main__.py:202\n",
-        "           DEBUG    Metric: 'model_recall_at_k' Value: 0.37209                                           __main__.py:202\n",
-        "           DEBUG    Metric: 'model_reciprocal_rank' Value: 0.16603                                       __main__.py:202\n",
+        "[16:39:13] DEBUG    Metric: 'model_average_precision' Value: 0.19598                                     __main__.py:202\n",
+        "           DEBUG    Metric: 'model_dcg_at_k' Value: 0.28571                                              __main__.py:202\n",
+        "           DEBUG    Metric: 'model_f1_score_at_k' Value: 0.04382                                         __main__.py:202\n",
+        "           DEBUG    Metric: 'model_hit_at_k' Value: 0.46013                                              __main__.py:202\n",
+        "           DEBUG    Metric: 'model_ndcg_at_k' Value: 0.28571                                             __main__.py:202\n",
+        "           DEBUG    Metric: 'model_precision_at_k' Value: 0.02301                                        __main__.py:202\n",
+        "           DEBUG    Metric: 'model_r_precision' Value: 0.19598                                           __main__.py:202\n",
+        "           DEBUG    Metric: 'model_recall_at_k' Value: 0.46013                                           __main__.py:202\n",
+        "           DEBUG    Metric: 'model_reciprocal_rank' Value: 0.19598                                       __main__.py:202\n",
         "           INFO     Done ✨                                                                              __main__.py:204\n",
         "           INFO     Saving fine-tuned models ...                                                         __main__.py:207\n",
         "           INFO     Saving model 'model' in /usr/src/app/tuned-models/model ...                          __main__.py:218\n",

diff --git a/docs/notebooks/image_to_image.md b/docs/notebooks/image_to_image.md
@@ -44,7 +44,7 @@ After fine-tuning, the embeddings of positive pairs are expected to be pulled cl
 ## Data
 
 Our journey starts locally. We have to prepare the data and push it to the Jina AI Cloud and Finetuner will be able to get the dataset by its name. For this example,
-we already prepared the data, and we'll provide the names of training data (`tll-train-da`) directly to Finetuner.
+we already prepared the data, and we'll provide the names of training data (`tll-train-data`) directly to Finetuner.
 
 ```{important} 
 We don't require you to push data to the Jina AI Cloud by yourself. Instead of a name, you can provide a `DocumentArray` and Finetuner will do the job for you.
@@ -63,9 +63,9 @@ finetuner.notebook_login(force=True)
 ```
 
 ```python id="ONpXDwFBsqQS"
-train_data = DocumentArray.pull('tll-train-da', show_progress=True)
-query_data = DocumentArray.pull('tll-test-query-da', show_progress=True)
-index_data = DocumentArray.pull('tll-test-index-da', show_progress=True)
+train_data = DocumentArray.pull('tll-train-data', show_progress=True)
+query_data = DocumentArray.pull('tll-test-query-data', show_progress=True)
+index_data = DocumentArray.pull('tll-test-index-data', show_progress=True)
 
 train_data.summary()
 ```
@@ -89,15 +89,15 @@ from finetuner.callback import EvaluationCallback
 
 run = finetuner.fit(
     model='resnet50',
-    train_data='tll-train-da',
+    train_data='tll-train-data',
     batch_size=128,
     epochs=5,
-    learning_rate=1e-5,
+    learning_rate=1e-4,
     device='cuda',
     callbacks=[
         EvaluationCallback(
-            query_data='tll-test-query-da',
-            index_data='tll-test-index-da',
+            query_data='tll-test-query-data',
+            index_data='tll-test-index-data',
         )
     ],
 )
@@ -147,15 +147,15 @@ What you can do for now is to call `run.logs()` in the end of the run and see ev
 
 ```bash
   Training [5/5] ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 76/76 0:00:00 0:03:15 • loss: 0.003
-[16:39:13] DEBUG    Metric: 'model_average_precision' Value: 0.16603                                     __main__.py:202
-           DEBUG    Metric: 'model_dcg_at_k' Value: 0.23632                                              __main__.py:202
-           DEBUG    Metric: 'model_f1_score_at_k' Value: 0.03544                                         __main__.py:202
-           DEBUG    Metric: 'model_hit_at_k' Value: 0.37209                                              __main__.py:202
-           DEBUG    Metric: 'model_ndcg_at_k' Value: 0.23632                                             __main__.py:202
-           DEBUG    Metric: 'model_precision_at_k' Value: 0.01860                                        __main__.py:202
-           DEBUG    Metric: 'model_r_precision' Value: 0.16603                                           __main__.py:202
-           DEBUG    Metric: 'model_recall_at_k' Value: 0.37209                                           __main__.py:202
-           DEBUG    Metric: 'model_reciprocal_rank' Value: 0.16603                                       __main__.py:202
+[16:39:13] DEBUG    Metric: 'model_average_precision' Value: 0.19598                                     __main__.py:202
+           DEBUG    Metric: 'model_dcg_at_k' Value: 0.28571                                              __main__.py:202
+           DEBUG    Metric: 'model_f1_score_at_k' Value: 0.04382                                         __main__.py:202
+           DEBUG    Metric: 'model_hit_at_k' Value: 0.46013                                              __main__.py:202
+           DEBUG    Metric: 'model_ndcg_at_k' Value: 0.28571                                             __main__.py:202
+           DEBUG    Metric: 'model_precision_at_k' Value: 0.02301                                        __main__.py:202
+           DEBUG    Metric: 'model_r_precision' Value: 0.19598                                           __main__.py:202
+           DEBUG    Metric: 'model_recall_at_k' Value: 0.46013                                           __main__.py:202
+           DEBUG    Metric: 'model_reciprocal_rank' Value: 0.19598                                       __main__.py:202
            INFO     Done ✨                                                                              __main__.py:204
            INFO     Saving fine-tuned models ...                                                         __main__.py:207
            INFO     Saving model 'model' in /usr/src/app/tuned-models/model ...                          __main__.py:218