diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index a834ef9..7cd6e00 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -8,6 +8,7 @@ on:
env:
TPE_HPO_BIN: 'tpe_hyperparameter_optm'
SERVING_BIN: 'serving'
+ EVALUATOR_BIN: 'evaluator'
RELEASE_NAME: 'Serenade'
WINDOWS_TARGET: x86_64-pc-windows-msvc.Zip
MACOS_TARGET: x86_64-apple-darwin.tar.gz
@@ -43,7 +44,7 @@ jobs:
shell: bash
run: |
cd target/release
- tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.LINUX_AMD64_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }}
+ tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.LINUX_AMD64_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} ${{ env.EVALUATOR_BIN }}
cd -
- name: Publish
@@ -82,7 +83,7 @@ jobs:
shell: bash
run: |
cd target/release
- tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.MACOS_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }}
+ tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.MACOS_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} ${{ env.EVALUATOR_BIN }}
cd -
- name: Publish
@@ -123,7 +124,7 @@ jobs:
shell: powershell
run: |
$compress = @{
- Path = "D:\a\serenade\serenade\target\release\${{ env.TPE_HPO_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.SERVING_BIN }}.exe"
+ Path = "D:\a\serenade\serenade\target\release\${{ env.TPE_HPO_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.SERVING_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.EVALUATOR_BIN }}.exe"
CompressionLevel = "Fastest"
DestinationPath = ".\${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.version }}-${{ env.WINDOWS_TARGET }}"
}
diff --git a/README.md b/README.md
index 6bb880c..221caa9 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ VMIS-kNN is an index-based variant of a state-of-the-art nearest neighbor algori
4. [Start the Serenade service](#start-service)
5. [Retrieve recommendations using python](#retrieve-recommendations)
6. [Using your own train- and testset](#dataset)
+7. [Evaluate the model](#evaluator)
### Downloads
@@ -23,6 +24,7 @@ Extract both downloaded files in the same directoy. You now have the following f
```
serving
tpe_hyperparameter_optm
+evaluator
train.txt
test.txt
valid.txt
@@ -41,7 +43,6 @@ The section `[hyperparam]` in the `example.toml` contains the ranges of hyperpar
The results will be printed out in the terminal, for example:
```
-...
===============================================================
=== HYPER PARAMETER OPTIMIZATION RESULTS ====
===============================================================
@@ -160,6 +161,27 @@ SessionId ItemId Time
10038 6424 1591008704.0
```
+### Evaluate the model
+The `evaluator` application can be used to evaluate a test dataset. It reports on several metrics.
+* The evaluation can be started using:
+```bash
+./evaluator example.toml
+```
+
+```
+===============================================================
+=== START EVALUATING TEST FILE ====
+===============================================================
+Mrr@20,Ndcg@20,HitRate@20,Popularity@20,Precision@20,Coverage@20,Recall@20,F1score@20
+0.3277,0.3553,0.6402,0.0499,0.0680,0.2765,0.4456,0.1180
+Qty test evaluations: 931
+Prediction latency
+p90 (microseconds): 66
+p95 (microseconds): 66
+p99.5 (microseconds): 66
+```
+
+
# Citation
> [Serenade - Low-Latency Session-Based Recommendation in e-Commerce at Scale](https://ssc.io/pdf/modds003.pdf)
diff --git a/assets/example/example.zip b/assets/example/example.zip
index 9381cff..170c6b9 100644
Binary files a/assets/example/example.zip and b/assets/example/example.zip differ
diff --git a/src/bin/evaluator.rs b/src/bin/evaluator.rs
index c06efad..6276201 100644
--- a/src/bin/evaluator.rs
+++ b/src/bin/evaluator.rs
@@ -1,55 +1,69 @@
+use std::path::Path;
use serenade_optimized::{io, vmisknn};
-use serenade_optimized::metrics::mrr::Mrr;
-use serenade_optimized::metrics::SessionMetric;
use serenade_optimized::vmisknn::vmis_index::VMISIndex;
+use serenade_optimized::config::AppConfig;
+use serenade_optimized::metrics::evaluation_reporter::EvaluationReporter;
+use serenade_optimized::stopwatch::Stopwatch;
fn main() {
- // hyper-parameters
- let n_most_recent_sessions = 1500;
- let neighborhood_size_k = 500;
- let last_items_in_session = 3;
- let idf_weighting = 1.0;
- let enable_business_logic = false;
+ let config_path = std::env::args().nth(1).unwrap_or_default();
+ let config = AppConfig::new(config_path);
- let path_to_training = std::env::args()
- .nth(1)
- .expect("Training data file not specified!");
+ let m_most_recent_sessions = config.model.m_most_recent_sessions;
+ let neighborhood_size_k = config.model.neighborhood_size_k;
+ let num_items_to_recommend = config.model.num_items_to_recommend;
+ let max_items_in_session = config.model.max_items_in_session;
+ let enable_business_logic = config.logic.enable_business_logic;
- println!("training_data_file:{}", path_to_training);
+ let training_data_path = Path::new(&config.data.training_data_path);
+ let vmis_index = if training_data_path.is_dir() {
+ // By default we use an index that is computed offline on billions of user-item interactions.
+ VMISIndex::new(&config.data.training_data_path)
+ } else if training_data_path.is_file() {
+ // The following line creates an index directly from a csv file as input.
+ VMISIndex::new_from_csv(
+ &config.data.training_data_path,
+ config.model.m_most_recent_sessions,
+ config.model.idf_weighting as f64,
+ )
+ } else {
+ panic!(
+ "Training data file does not exist: {}",
+ &config.data.training_data_path
+ )
+ };
- let test_data_file = std::env::args()
- .nth(2)
- .expect("Test data file not specified!");
+ let test_data_file = config.hyperparam.test_data_path;
println!("test_data_file:{}", test_data_file);
- let vmis_index = VMISIndex::new_from_csv(&*path_to_training, n_most_recent_sessions, idf_weighting);
-
let ordered_test_sessions = io::read_test_data_evolving(&*test_data_file);
- let qty_max_reco_results = 20;
- let mut mymetric = Mrr::new(qty_max_reco_results);
+ let mut reporter = EvaluationReporter::new(&io::read_training_data(&*config.data.training_data_path), num_items_to_recommend);
+
+ let mut stopwatch = Stopwatch::new();
ordered_test_sessions
.iter()
.for_each(|(_session_id, evolving_session_items)| {
for session_state in 1..evolving_session_items.len() {
// use last x items of evolving session
- let start_index = if session_state > last_items_in_session {
- session_state - last_items_in_session
+ let start_index = if session_state > max_items_in_session {
+ session_state - max_items_in_session
} else {
0
};
let session: &[u64] = &evolving_session_items[start_index..session_state];
+ stopwatch.start();
let recommendations = vmisknn::predict(
&vmis_index,
&session,
neighborhood_size_k,
- n_most_recent_sessions,
- qty_max_reco_results,
+ m_most_recent_sessions,
+ num_items_to_recommend,
enable_business_logic,
);
-
+ stopwatch.stop(&start_index);
let recommended_items = recommendations
.into_sorted_vec()
.iter()
@@ -57,9 +71,17 @@ fn main() {
.collect::>();
let actual_next_items = Vec::from(&evolving_session_items[session_state..]);
- mymetric.add(&recommended_items, &actual_next_items);
+ reporter.add(&recommended_items, &actual_next_items);
}
});
-
- println!("{}: {}", mymetric.get_name(), mymetric.result());
+ println!("===============================================================");
+ println!("=== START EVALUATING TEST FILE ====");
+ println!("===============================================================");
+ println!("{}", reporter.get_name());
+ println!("{}", reporter.result());
+ println!("Qty test evaluations: {}", stopwatch.get_n());
+ println!("Prediction latency");
+ println!("p90 (microseconds): {}", stopwatch.get_percentile_in_micros(90.0));
+ println!("p95 (microseconds): {}", stopwatch.get_percentile_in_micros(95.0));
+ println!("p99.5 (microseconds): {}", stopwatch.get_percentile_in_micros(99.5));
}
diff --git a/src/stopwatch.rs b/src/stopwatch.rs
index 670f64e..25c5887 100644
--- a/src/stopwatch.rs
+++ b/src/stopwatch.rs
@@ -29,10 +29,10 @@ impl Stopwatch {
self.start_time = Instant::now();
}
- pub fn stop(&mut self, position_in_session: usize) {
+ pub fn stop(&mut self, position_in_session: &usize) {
let duration = self.start_time.elapsed();
let duration_as_micros: f64 = duration.as_micros() as f64;
- let tuple: PositionDurationMicros = (position_in_session as u32, duration_as_micros);
+ let tuple: PositionDurationMicros = (*position_in_session as u32, duration_as_micros);
self.prediction_durations.push(tuple);
}