diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a834ef9..7cd6e00 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,6 +8,7 @@ on: env: TPE_HPO_BIN: 'tpe_hyperparameter_optm' SERVING_BIN: 'serving' + EVALUATOR_BIN: 'evaluator' RELEASE_NAME: 'Serenade' WINDOWS_TARGET: x86_64-pc-windows-msvc.Zip MACOS_TARGET: x86_64-apple-darwin.tar.gz @@ -43,7 +44,7 @@ jobs: shell: bash run: | cd target/release - tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.LINUX_AMD64_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} + tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.LINUX_AMD64_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} ${{ env.EVALUATOR_BIN }} cd - - name: Publish @@ -82,7 +83,7 @@ jobs: shell: bash run: | cd target/release - tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.MACOS_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} + tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.MACOS_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} ${{ env.EVALUATOR_BIN }} cd - - name: Publish @@ -123,7 +124,7 @@ jobs: shell: powershell run: | $compress = @{ - Path = "D:\a\serenade\serenade\target\release\${{ env.TPE_HPO_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.SERVING_BIN }}.exe" + Path = "D:\a\serenade\serenade\target\release\${{ env.TPE_HPO_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.SERVING_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.EVALUATOR_BIN }}.exe" CompressionLevel = "Fastest" DestinationPath = ".\${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.version }}-${{ env.WINDOWS_TARGET }}" } diff --git a/README.md b/README.md index 6bb880c..221caa9 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ VMIS-kNN is an index-based variant of a state-of-the-art nearest neighbor algori 4. [Start the Serenade service](#start-service) 5. [Retrieve recommendations using python](#retrieve-recommendations) 6. [Using your own train- and testset](#dataset) +7. [Evaluate the model](#evaluator) ### Downloads @@ -23,6 +24,7 @@ Extract both downloaded files in the same directoy. You now have the following f ``` serving tpe_hyperparameter_optm +evaluator train.txt test.txt valid.txt @@ -41,7 +43,6 @@ The section `[hyperparam]` in the `example.toml` contains the ranges of hyperpar The results will be printed out in the terminal, for example: ``` -... =============================================================== === HYPER PARAMETER OPTIMIZATION RESULTS ==== =============================================================== @@ -160,6 +161,27 @@ SessionId ItemId Time 10038 6424 1591008704.0 ``` +### Evaluate the model +The `evaluator` application can be used to evaluate a test dataset. It reports on several metrics. +* The evaluation can be started using: +```bash +./evaluator example.toml +``` + +``` +=============================================================== +=== START EVALUATING TEST FILE ==== +=============================================================== +Mrr@20,Ndcg@20,HitRate@20,Popularity@20,Precision@20,Coverage@20,Recall@20,F1score@20 +0.3277,0.3553,0.6402,0.0499,0.0680,0.2765,0.4456,0.1180 +Qty test evaluations: 931 +Prediction latency +p90 (microseconds): 66 +p95 (microseconds): 66 +p99.5 (microseconds): 66 +``` + + # Citation > [Serenade - Low-Latency Session-Based Recommendation in e-Commerce at Scale](https://ssc.io/pdf/modds003.pdf) diff --git a/assets/example/example.zip b/assets/example/example.zip index 9381cff..170c6b9 100644 Binary files a/assets/example/example.zip and b/assets/example/example.zip differ diff --git a/src/bin/evaluator.rs b/src/bin/evaluator.rs index c06efad..6276201 100644 --- a/src/bin/evaluator.rs +++ b/src/bin/evaluator.rs @@ -1,55 +1,69 @@ +use std::path::Path; use serenade_optimized::{io, vmisknn}; -use serenade_optimized::metrics::mrr::Mrr; -use serenade_optimized::metrics::SessionMetric; use serenade_optimized::vmisknn::vmis_index::VMISIndex; +use serenade_optimized::config::AppConfig; +use serenade_optimized::metrics::evaluation_reporter::EvaluationReporter; +use serenade_optimized::stopwatch::Stopwatch; fn main() { - // hyper-parameters - let n_most_recent_sessions = 1500; - let neighborhood_size_k = 500; - let last_items_in_session = 3; - let idf_weighting = 1.0; - let enable_business_logic = false; + let config_path = std::env::args().nth(1).unwrap_or_default(); + let config = AppConfig::new(config_path); - let path_to_training = std::env::args() - .nth(1) - .expect("Training data file not specified!"); + let m_most_recent_sessions = config.model.m_most_recent_sessions; + let neighborhood_size_k = config.model.neighborhood_size_k; + let num_items_to_recommend = config.model.num_items_to_recommend; + let max_items_in_session = config.model.max_items_in_session; + let enable_business_logic = config.logic.enable_business_logic; - println!("training_data_file:{}", path_to_training); + let training_data_path = Path::new(&config.data.training_data_path); + let vmis_index = if training_data_path.is_dir() { + // By default we use an index that is computed offline on billions of user-item interactions. + VMISIndex::new(&config.data.training_data_path) + } else if training_data_path.is_file() { + // The following line creates an index directly from a csv file as input. + VMISIndex::new_from_csv( + &config.data.training_data_path, + config.model.m_most_recent_sessions, + config.model.idf_weighting as f64, + ) + } else { + panic!( + "Training data file does not exist: {}", + &config.data.training_data_path + ) + }; - let test_data_file = std::env::args() - .nth(2) - .expect("Test data file not specified!"); + let test_data_file = config.hyperparam.test_data_path; println!("test_data_file:{}", test_data_file); - let vmis_index = VMISIndex::new_from_csv(&*path_to_training, n_most_recent_sessions, idf_weighting); - let ordered_test_sessions = io::read_test_data_evolving(&*test_data_file); - let qty_max_reco_results = 20; - let mut mymetric = Mrr::new(qty_max_reco_results); + let mut reporter = EvaluationReporter::new(&io::read_training_data(&*config.data.training_data_path), num_items_to_recommend); + + let mut stopwatch = Stopwatch::new(); ordered_test_sessions .iter() .for_each(|(_session_id, evolving_session_items)| { for session_state in 1..evolving_session_items.len() { // use last x items of evolving session - let start_index = if session_state > last_items_in_session { - session_state - last_items_in_session + let start_index = if session_state > max_items_in_session { + session_state - max_items_in_session } else { 0 }; let session: &[u64] = &evolving_session_items[start_index..session_state]; + stopwatch.start(); let recommendations = vmisknn::predict( &vmis_index, &session, neighborhood_size_k, - n_most_recent_sessions, - qty_max_reco_results, + m_most_recent_sessions, + num_items_to_recommend, enable_business_logic, ); - + stopwatch.stop(&start_index); let recommended_items = recommendations .into_sorted_vec() .iter() @@ -57,9 +71,17 @@ fn main() { .collect::>(); let actual_next_items = Vec::from(&evolving_session_items[session_state..]); - mymetric.add(&recommended_items, &actual_next_items); + reporter.add(&recommended_items, &actual_next_items); } }); - - println!("{}: {}", mymetric.get_name(), mymetric.result()); + println!("==============================================================="); + println!("=== START EVALUATING TEST FILE ===="); + println!("==============================================================="); + println!("{}", reporter.get_name()); + println!("{}", reporter.result()); + println!("Qty test evaluations: {}", stopwatch.get_n()); + println!("Prediction latency"); + println!("p90 (microseconds): {}", stopwatch.get_percentile_in_micros(90.0)); + println!("p95 (microseconds): {}", stopwatch.get_percentile_in_micros(95.0)); + println!("p99.5 (microseconds): {}", stopwatch.get_percentile_in_micros(99.5)); } diff --git a/src/stopwatch.rs b/src/stopwatch.rs index 670f64e..25c5887 100644 --- a/src/stopwatch.rs +++ b/src/stopwatch.rs @@ -29,10 +29,10 @@ impl Stopwatch { self.start_time = Instant::now(); } - pub fn stop(&mut self, position_in_session: usize) { + pub fn stop(&mut self, position_in_session: &usize) { let duration = self.start_time.elapsed(); let duration_as_micros: f64 = duration.as_micros() as f64; - let tuple: PositionDurationMicros = (position_in_session as u32, duration_as_micros); + let tuple: PositionDurationMicros = (*position_in_session as u32, duration_as_micros); self.prediction_durations.push(tuple); }