add functionality to evaluate a test dataset

bolcom · Mar 18, 2022 · 90a2137 · 90a2137
1 parent ea9b2fe
commit 90a2137
Show file tree

Hide file tree

Showing 5 changed files with 78 additions and 33 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -8,6 +8,7 @@ on:
 env:
   TPE_HPO_BIN: 'tpe_hyperparameter_optm'
   SERVING_BIN: 'serving'
+  EVALUATOR_BIN: 'evaluator'
   RELEASE_NAME: 'Serenade'
   WINDOWS_TARGET: x86_64-pc-windows-msvc.Zip
   MACOS_TARGET: x86_64-apple-darwin.tar.gz
@@ -43,7 +44,7 @@ jobs:
         shell: bash
         run: |
           cd target/release
-          tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.LINUX_AMD64_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }}
+          tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.LINUX_AMD64_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} ${{ env.EVALUATOR_BIN }}
           cd -
 
       - name: Publish
@@ -82,7 +83,7 @@ jobs:
         shell: bash
         run: |
           cd target/release
-          tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.MACOS_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }}
+          tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.MACOS_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} ${{ env.EVALUATOR_BIN }}
           cd -
 
       - name: Publish
@@ -123,7 +124,7 @@ jobs:
         shell: powershell
         run: |
           $compress = @{
-            Path = "D:\a\serenade\serenade\target\release\${{ env.TPE_HPO_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.SERVING_BIN }}.exe"
+            Path = "D:\a\serenade\serenade\target\release\${{ env.TPE_HPO_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.SERVING_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.EVALUATOR_BIN }}.exe" 
             CompressionLevel = "Fastest"
             DestinationPath = ".\${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.version }}-${{ env.WINDOWS_TARGET }}"
           }

diff --git a/README.md b/README.md
@@ -13,6 +13,7 @@ VMIS-kNN is an index-based variant of a state-of-the-art nearest neighbor algori
 4. [Start the Serenade service](#start-service)
 5. [Retrieve recommendations using python](#retrieve-recommendations)
 6. [Using your own train- and testset](#dataset)
+7. [Evaluate the model](#evaluator)
 
 
 ### Downloads <a name="downloads"></a>
@@ -23,6 +24,7 @@ Extract both downloaded files in the same directoy. You now have the following f
 ```
 serving
 tpe_hyperparameter_optm
+evaluator
 train.txt
 test.txt
 valid.txt
@@ -41,7 +43,6 @@ The section `[hyperparam]` in the `example.toml` contains the ranges of hyperpar
 
 The results will be printed out in the terminal, for example:
 ```
-...
 ===============================================================
 ===          HYPER PARAMETER OPTIMIZATION RESULTS          ====
 ===============================================================
@@ -160,6 +161,27 @@ SessionId       ItemId  Time
 10038   6424    1591008704.0
 ```
 
+### Evaluate the model <a name="evaluator"></a>
+The `evaluator` application can be used to evaluate a test dataset. It reports on several metrics.
+* The evaluation can be started using:
+```bash
+./evaluator example.toml 
+```
+
+```
+===============================================================
+===               START EVALUATING TEST FILE               ====
+===============================================================
+Mrr@20,Ndcg@20,HitRate@20,Popularity@20,Precision@20,Coverage@20,Recall@20,F1score@20
+0.3277,0.3553,0.6402,0.0499,0.0680,0.2765,0.4456,0.1180
+Qty test evaluations: 931
+Prediction latency
+p90 (microseconds): 66
+p95 (microseconds): 66
+p99.5 (microseconds): 66
+```
+
+
 # Citation
 > [Serenade - Low-Latency Session-Based Recommendation in e-Commerce at Scale](https://ssc.io/pdf/modds003.pdf)
 

diff --git a/assets/example/example.zip b/assets/example/example.zip
diff --git a/src/bin/evaluator.rs b/src/bin/evaluator.rs
@@ -1,65 +1,87 @@
+use std::path::Path;
 use serenade_optimized::{io, vmisknn};
 
-use serenade_optimized::metrics::mrr::Mrr;
-use serenade_optimized::metrics::SessionMetric;
 use serenade_optimized::vmisknn::vmis_index::VMISIndex;
+use serenade_optimized::config::AppConfig;
+use serenade_optimized::metrics::evaluation_reporter::EvaluationReporter;
+use serenade_optimized::stopwatch::Stopwatch;
 
 fn main() {
-    // hyper-parameters
-    let n_most_recent_sessions = 1500;
-    let neighborhood_size_k = 500;
-    let last_items_in_session = 3;
-    let idf_weighting = 1.0;
-    let enable_business_logic = false;
+    let config_path = std::env::args().nth(1).unwrap_or_default();
+    let config = AppConfig::new(config_path);
 
-    let path_to_training = std::env::args()
-        .nth(1)
-        .expect("Training data file not specified!");
+    let m_most_recent_sessions = config.model.m_most_recent_sessions;
+    let neighborhood_size_k = config.model.neighborhood_size_k;
+    let num_items_to_recommend = config.model.num_items_to_recommend;
+    let max_items_in_session = config.model.max_items_in_session;
+    let enable_business_logic = config.logic.enable_business_logic;
 
-    println!("training_data_file:{}", path_to_training);
+    let training_data_path = Path::new(&config.data.training_data_path);
+    let vmis_index = if training_data_path.is_dir() {
+        // By default we use an index that is computed offline on billions of user-item interactions.
+        VMISIndex::new(&config.data.training_data_path)
+    } else if training_data_path.is_file() {
+        // The following line creates an index directly from a csv file as input.
+        VMISIndex::new_from_csv(
+            &config.data.training_data_path,
+            config.model.m_most_recent_sessions,
+            config.model.idf_weighting as f64,
+        )
+    } else {
+        panic!(
+            "Training data file does not exist: {}",
+            &config.data.training_data_path
+        )
+    };
 
-    let test_data_file = std::env::args()
-        .nth(2)
-        .expect("Test data file not specified!");
+    let test_data_file = config.hyperparam.test_data_path;
     println!("test_data_file:{}", test_data_file);
 
-    let vmis_index = VMISIndex::new_from_csv(&*path_to_training, n_most_recent_sessions, idf_weighting);
-
     let ordered_test_sessions = io::read_test_data_evolving(&*test_data_file);
 
-    let qty_max_reco_results = 20;
-    let mut mymetric = Mrr::new(qty_max_reco_results);
+    let mut reporter = EvaluationReporter::new(&io::read_training_data(&*config.data.training_data_path), num_items_to_recommend);
+
+    let mut stopwatch = Stopwatch::new();
 
     ordered_test_sessions
         .iter()
         .for_each(|(_session_id, evolving_session_items)| {
             for session_state in 1..evolving_session_items.len() {
                 // use last x items of evolving session
-                let start_index = if session_state > last_items_in_session {
-                    session_state - last_items_in_session
+                let start_index = if session_state > max_items_in_session {
+                    session_state - max_items_in_session
                 } else {
                     0
                 };
                 let session: &[u64] = &evolving_session_items[start_index..session_state];
+                stopwatch.start();
                 let recommendations = vmisknn::predict(
                     &vmis_index,
                     &session,
                     neighborhood_size_k,
-                    n_most_recent_sessions,
-                    qty_max_reco_results,
+                    m_most_recent_sessions,
+                    num_items_to_recommend,
                     enable_business_logic,
                 );
-
+                stopwatch.stop(&start_index);
                 let recommended_items = recommendations
                     .into_sorted_vec()
                     .iter()
                     .map(|scored| scored.id)
                     .collect::<Vec<u64>>();
 
                 let actual_next_items = Vec::from(&evolving_session_items[session_state..]);
-                mymetric.add(&recommended_items, &actual_next_items);
+                reporter.add(&recommended_items, &actual_next_items);
             }
         });
-
-    println!("{}: {}", mymetric.get_name(), mymetric.result());
+    println!("===============================================================");
+    println!("===               START EVALUATING TEST FILE               ====");
+    println!("===============================================================");
+    println!("{}", reporter.get_name());
+    println!("{}", reporter.result());
+    println!("Qty test evaluations: {}", stopwatch.get_n());
+    println!("Prediction latency");
+    println!("p90 (microseconds): {}", stopwatch.get_percentile_in_micros(90.0));
+    println!("p95 (microseconds): {}", stopwatch.get_percentile_in_micros(95.0));
+    println!("p99.5 (microseconds): {}", stopwatch.get_percentile_in_micros(99.5));
 }
diff --git a/src/stopwatch.rs b/src/stopwatch.rs
@@ -29,10 +29,10 @@ impl Stopwatch {
         self.start_time = Instant::now();
     }
 
-    pub fn stop(&mut self, position_in_session: usize) {
+    pub fn stop(&mut self, position_in_session: &usize) {
         let duration = self.start_time.elapsed();
         let duration_as_micros: f64 = duration.as_micros() as f64;
-        let tuple: PositionDurationMicros = (position_in_session as u32, duration_as_micros);
+        let tuple: PositionDurationMicros = (*position_in_session as u32, duration_as_micros);
         self.prediction_durations.push(tuple);
     }