Skip to content

Commit

Permalink
add functionality to evaluate a test dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
bkersbergen committed Mar 18, 2022
1 parent ea9b2fe commit 90a2137
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 33 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ on:
env:
TPE_HPO_BIN: 'tpe_hyperparameter_optm'
SERVING_BIN: 'serving'
EVALUATOR_BIN: 'evaluator'
RELEASE_NAME: 'Serenade'
WINDOWS_TARGET: x86_64-pc-windows-msvc.Zip
MACOS_TARGET: x86_64-apple-darwin.tar.gz
Expand Down Expand Up @@ -43,7 +44,7 @@ jobs:
shell: bash
run: |
cd target/release
tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.LINUX_AMD64_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }}
tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.LINUX_AMD64_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} ${{ env.EVALUATOR_BIN }}
cd -
- name: Publish
Expand Down Expand Up @@ -82,7 +83,7 @@ jobs:
shell: bash
run: |
cd target/release
tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.MACOS_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }}
tar czvf ../../${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.VERSION }}-${{ env.MACOS_TARGET }} ${{ env.TPE_HPO_BIN }} ${{ env.SERVING_BIN }} ${{ env.EVALUATOR_BIN }}
cd -
- name: Publish
Expand Down Expand Up @@ -123,7 +124,7 @@ jobs:
shell: powershell
run: |
$compress = @{
Path = "D:\a\serenade\serenade\target\release\${{ env.TPE_HPO_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.SERVING_BIN }}.exe"
Path = "D:\a\serenade\serenade\target\release\${{ env.TPE_HPO_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.SERVING_BIN }}.exe", "D:\a\serenade\serenade\target\release\${{ env.EVALUATOR_BIN }}.exe"
CompressionLevel = "Fastest"
DestinationPath = ".\${{ env.RELEASE_NAME }}-${{ steps.get_version.outputs.version }}-${{ env.WINDOWS_TARGET }}"
}
Expand Down
24 changes: 23 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ VMIS-kNN is an index-based variant of a state-of-the-art nearest neighbor algori
4. [Start the Serenade service](#start-service)
5. [Retrieve recommendations using python](#retrieve-recommendations)
6. [Using your own train- and testset](#dataset)
7. [Evaluate the model](#evaluator)


### Downloads <a name="downloads"></a>
Expand All @@ -23,6 +24,7 @@ Extract both downloaded files in the same directoy. You now have the following f
```
serving
tpe_hyperparameter_optm
evaluator
train.txt
test.txt
valid.txt
Expand All @@ -41,7 +43,6 @@ The section `[hyperparam]` in the `example.toml` contains the ranges of hyperpar

The results will be printed out in the terminal, for example:
```
...
===============================================================
=== HYPER PARAMETER OPTIMIZATION RESULTS ====
===============================================================
Expand Down Expand Up @@ -160,6 +161,27 @@ SessionId ItemId Time
10038 6424 1591008704.0
```

### Evaluate the model <a name="evaluator"></a>
The `evaluator` application can be used to evaluate a test dataset. It reports on several metrics.
* The evaluation can be started using:
```bash
./evaluator example.toml
```

```
===============================================================
=== START EVALUATING TEST FILE ====
===============================================================
Mrr@20,Ndcg@20,HitRate@20,Popularity@20,Precision@20,Coverage@20,Recall@20,F1score@20
0.3277,0.3553,0.6402,0.0499,0.0680,0.2765,0.4456,0.1180
Qty test evaluations: 931
Prediction latency
p90 (microseconds): 66
p95 (microseconds): 66
p99.5 (microseconds): 66
```


# Citation
> [Serenade - Low-Latency Session-Based Recommendation in e-Commerce at Scale](https://ssc.io/pdf/modds003.pdf)
Expand Down
Binary file modified assets/example/example.zip
Binary file not shown.
76 changes: 49 additions & 27 deletions src/bin/evaluator.rs
Original file line number Diff line number Diff line change
@@ -1,65 +1,87 @@
use std::path::Path;
use serenade_optimized::{io, vmisknn};

use serenade_optimized::metrics::mrr::Mrr;
use serenade_optimized::metrics::SessionMetric;
use serenade_optimized::vmisknn::vmis_index::VMISIndex;
use serenade_optimized::config::AppConfig;
use serenade_optimized::metrics::evaluation_reporter::EvaluationReporter;
use serenade_optimized::stopwatch::Stopwatch;

fn main() {
// hyper-parameters
let n_most_recent_sessions = 1500;
let neighborhood_size_k = 500;
let last_items_in_session = 3;
let idf_weighting = 1.0;
let enable_business_logic = false;
let config_path = std::env::args().nth(1).unwrap_or_default();
let config = AppConfig::new(config_path);

let path_to_training = std::env::args()
.nth(1)
.expect("Training data file not specified!");
let m_most_recent_sessions = config.model.m_most_recent_sessions;
let neighborhood_size_k = config.model.neighborhood_size_k;
let num_items_to_recommend = config.model.num_items_to_recommend;
let max_items_in_session = config.model.max_items_in_session;
let enable_business_logic = config.logic.enable_business_logic;

println!("training_data_file:{}", path_to_training);
let training_data_path = Path::new(&config.data.training_data_path);
let vmis_index = if training_data_path.is_dir() {
// By default we use an index that is computed offline on billions of user-item interactions.
VMISIndex::new(&config.data.training_data_path)
} else if training_data_path.is_file() {
// The following line creates an index directly from a csv file as input.
VMISIndex::new_from_csv(
&config.data.training_data_path,
config.model.m_most_recent_sessions,
config.model.idf_weighting as f64,
)
} else {
panic!(
"Training data file does not exist: {}",
&config.data.training_data_path
)
};

let test_data_file = std::env::args()
.nth(2)
.expect("Test data file not specified!");
let test_data_file = config.hyperparam.test_data_path;
println!("test_data_file:{}", test_data_file);

let vmis_index = VMISIndex::new_from_csv(&*path_to_training, n_most_recent_sessions, idf_weighting);

let ordered_test_sessions = io::read_test_data_evolving(&*test_data_file);

let qty_max_reco_results = 20;
let mut mymetric = Mrr::new(qty_max_reco_results);
let mut reporter = EvaluationReporter::new(&io::read_training_data(&*config.data.training_data_path), num_items_to_recommend);

let mut stopwatch = Stopwatch::new();

ordered_test_sessions
.iter()
.for_each(|(_session_id, evolving_session_items)| {
for session_state in 1..evolving_session_items.len() {
// use last x items of evolving session
let start_index = if session_state > last_items_in_session {
session_state - last_items_in_session
let start_index = if session_state > max_items_in_session {
session_state - max_items_in_session
} else {
0
};
let session: &[u64] = &evolving_session_items[start_index..session_state];
stopwatch.start();
let recommendations = vmisknn::predict(
&vmis_index,
&session,
neighborhood_size_k,
n_most_recent_sessions,
qty_max_reco_results,
m_most_recent_sessions,
num_items_to_recommend,
enable_business_logic,
);

stopwatch.stop(&start_index);
let recommended_items = recommendations
.into_sorted_vec()
.iter()
.map(|scored| scored.id)
.collect::<Vec<u64>>();

let actual_next_items = Vec::from(&evolving_session_items[session_state..]);
mymetric.add(&recommended_items, &actual_next_items);
reporter.add(&recommended_items, &actual_next_items);
}
});

println!("{}: {}", mymetric.get_name(), mymetric.result());
println!("===============================================================");
println!("=== START EVALUATING TEST FILE ====");
println!("===============================================================");
println!("{}", reporter.get_name());
println!("{}", reporter.result());
println!("Qty test evaluations: {}", stopwatch.get_n());
println!("Prediction latency");
println!("p90 (microseconds): {}", stopwatch.get_percentile_in_micros(90.0));
println!("p95 (microseconds): {}", stopwatch.get_percentile_in_micros(95.0));
println!("p99.5 (microseconds): {}", stopwatch.get_percentile_in_micros(99.5));
}
4 changes: 2 additions & 2 deletions src/stopwatch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ impl Stopwatch {
self.start_time = Instant::now();
}

pub fn stop(&mut self, position_in_session: usize) {
pub fn stop(&mut self, position_in_session: &usize) {
let duration = self.start_time.elapsed();
let duration_as_micros: f64 = duration.as_micros() as f64;
let tuple: PositionDurationMicros = (position_in_session as u32, duration_as_micros);
let tuple: PositionDurationMicros = (*position_in_session as u32, duration_as_micros);
self.prediction_durations.push(tuple);
}

Expand Down

0 comments on commit 90a2137

Please sign in to comment.