Skip to content
This repository has been archived by the owner on Mar 18, 2023. It is now read-only.

Commit

Permalink
🤒 Break: kill the trainer, it's no use
Browse files Browse the repository at this point in the history
  • Loading branch information
eigenein committed Sep 24, 2022
1 parent 58edc05 commit 6ab7602
Show file tree
Hide file tree
Showing 24 changed files with 40 additions and 1,119 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,3 @@ jobs:
run: |
ssh yastatist "sudo systemctl restart crawler-eu
systemctl status --lines=0 crawler-eu"
- name: Restart Trainer
if: ${{ true }}
run: |
ssh yastatist "sudo systemctl restart trainer
systemctl status --lines=0 trainer"
56 changes: 3 additions & 53 deletions src/crawler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use std::sync::Arc;

use futures::{stream, Stream, StreamExt, TryStreamExt};
use itertools::Itertools;
use mongodb::bson::oid::ObjectId;
use tokio::sync::Mutex;

use self::crawled_data::CrawledData;
Expand All @@ -24,7 +23,6 @@ pub struct Crawler {
metrics: Mutex<CrawlerMetrics>,
n_buffered_batches: usize,
heartbeat_url: Option<String>,
enable_train: bool,
}

/// Runs the full-featured account crawler, that infinitely scans all the accounts
Expand All @@ -37,7 +35,7 @@ pub async fn run_crawler(opts: CrawlerOpts) -> Result {
scope.set_tag("realm", opts.shared.realm);
});

let crawler = Crawler::new(&opts.shared, opts.heartbeat_url, opts.enable_train).await?;
let crawler = Crawler::new(&opts.shared, opts.heartbeat_url).await?;
let accounts = database::Account::get_sampled_stream(
crawler.db.clone(),
opts.shared.realm,
Expand All @@ -60,16 +58,12 @@ pub async fn crawl_accounts(opts: CrawlAccountsOpts) -> Result {
let accounts = stream::iter(opts.start_id..opts.end_id)
.map(|account_id| database::Account::new(opts.shared.realm, account_id))
.map(Ok);
let crawler = Crawler::new(&opts.shared, None, false).await?;
let crawler = Crawler::new(&opts.shared, None).await?;
crawler.run(accounts).await
}

impl Crawler {
pub async fn new(
opts: &SharedCrawlerOpts,
heartbeat_url: Option<String>,
enable_train: bool,
) -> Result<Self> {
pub async fn new(opts: &SharedCrawlerOpts, heartbeat_url: Option<String>) -> Result<Self> {
let api = WargamingApi::new(
&opts.connections.application_id,
opts.connections.api_timeout,
Expand All @@ -85,7 +79,6 @@ impl Crawler {
db,
n_buffered_batches: opts.buffering.n_batches,
heartbeat_url,
enable_train,
};
Ok(this)
}
Expand Down Expand Up @@ -196,11 +189,6 @@ impl Crawler {
.iter()
.map_into::<database::PartialTankStats>()
.collect_vec();
let train_items = if self.enable_train {
self.gather_train_items(&account, &tanks_stats)
} else {
Vec::new()
};
let tanks_stats = tanks_stats
.into_iter()
.filter(|tank| match account.last_battle_time {
Expand Down Expand Up @@ -237,47 +225,9 @@ impl Crawler {
account_snapshot,
tank_snapshots,
rating_snapshot,
train_items,
})
}

/// Gather the recommender system's train data.
/// Highly experimental.
#[instrument(level = "debug", skip_all, fields(account_id = account.id))]
fn gather_train_items(
&self,
account: &database::Account,
actual_tank_stats: &[wargaming::TankStats],
) -> Vec<database::TrainItem> {
let previous_partial_tank_stats = account
.partial_tank_stats
.iter()
.map(|stats| (stats.tank_id, (stats.n_battles, stats.n_wins)))
.collect::<AHashMap<_, _>>();
actual_tank_stats
.iter()
.filter_map(|stats| {
previous_partial_tank_stats
.get(&stats.tank_id)
.and_then(|(n_battles, n_wins)| {
let differs = stats.all.n_battles != 0
&& stats.all.n_battles > *n_battles
&& stats.all.n_wins >= *n_wins;
differs.then(|| database::TrainItem {
object_id: ObjectId::from_bytes([0; 12]),
realm: account.realm,
account_id: account.id,
tank_id: stats.tank_id,
last_battle_time: stats.last_battle_time,
// TODO: check `n_battles >= n_wins`.
n_battles: stats.all.n_battles - n_battles,
n_wins: stats.all.n_wins - n_wins,
})
})
})
.collect()
}

#[instrument(skip_all, fields(account_id = crawled_data.account_snapshot.account_id))]
async fn update_account(&self, crawled_data: CrawledData) -> Result {
let start_instant = Instant::now();
Expand Down
5 changes: 0 additions & 5 deletions src/crawler/crawled_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ pub struct CrawledData {
pub account_snapshot: database::AccountSnapshot,
pub tank_snapshots: Vec<database::TankSnapshot>,
pub rating_snapshot: Option<database::RatingSnapshot>,
pub train_items: Vec<database::TrainItem>,
}

impl CrawledData {
Expand All @@ -21,7 +20,6 @@ impl CrawledData {
account_id = self.account.id,
rating_snapshot.is_some = self.rating_snapshot.is_some(),
n_tank_snapshots = self.tank_snapshots.len(),
n_train_items = self.train_items.len(),
)
)]
pub async fn upsert(&self, into: &Database) -> Result {
Expand All @@ -32,9 +30,6 @@ impl CrawledData {
rating_snapshot.upsert(into).await?;
}
self.account.upsert(into).await?;
for train_item in &self.train_items {
train_item.upsert(into).await?;
}
debug!(elapsed = ?start_instant.elapsed());
Ok(())
}
Expand Down
1 change: 0 additions & 1 deletion src/database/mongodb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ pub async fn open(uri: &str) -> Result<Database> {
models::AccountSnapshot::ensure_indexes(&database).await?;
models::TankSnapshot::ensure_indexes(&database).await?;
models::RatingSnapshot::ensure_indexes(&database).await?;
models::TrainItem::ensure_indexes(&database).await?;

info!("connected");
Ok(database)
Expand Down
2 changes: 0 additions & 2 deletions src/database/mongodb/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@ pub use self::account_snapshot::*;
pub use self::rating_snapshot::*;
pub use self::root::*;
pub use self::tank_snapshot::*;
pub use self::train_item::*;

mod account;
mod account_snapshot;
mod rating_snapshot;
mod root;
mod tank_snapshot;
mod train_item;
102 changes: 0 additions & 102 deletions src/database/mongodb/models/train_item.rs

This file was deleted.

2 changes: 0 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ mod math;
mod opts;
mod prelude;
mod tankopedia;
mod trainer;
pub mod wargaming;
mod web;

Expand All @@ -47,7 +46,6 @@ async fn run_subcommand(opts: Opts) -> Result {
Subcommand::CrawlAccounts(opts) => crawler::crawl_accounts(opts).await,
Subcommand::ImportTankopedia(opts) => tankopedia::import(opts).await,
Subcommand::Web(opts) => web::run(opts).await,
Subcommand::Train(opts) => trainer::run(opts).await,
};
info!(elapsed = ?start_instant.elapsed(), "the command has finished");
if let Err(error) = &result {
Expand Down
56 changes: 0 additions & 56 deletions src/opts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ pub enum Subcommand {
CrawlAccounts(CrawlAccountsOpts),
ImportTankopedia(ImportTankopediaOpts),
Web(WebOpts),
Train(TrainOpts),
}

/// Runs the web application.
Expand All @@ -62,14 +61,6 @@ pub struct WebOpts {
default_value = "http://localhost:8082"
)]
pub trainer_base_url: String,

#[structopt(
long,
env = "BLITZ_DASHBOARD_WEB_TRAINER_TESTERS",
default_value = "513713270,5589968,10894576",
use_delimiter = true
)]
pub trainer_testers: Vec<wargaming::AccountId>,
}

/// Runs the account crawler.
Expand Down Expand Up @@ -106,9 +97,6 @@ pub struct CrawlerOpts {

#[clap(long, env = "BLITZ_DASHBOARD_CRAWLER_HEARTBEAT_URL")]
pub heartbeat_url: Option<String>,

#[clap(long, env = "BLITZ_DASHBOARD_CRAWLER_ENABLE_TRAIN")]
pub enable_train: bool,
}

/// Updates the bundled Tankopedia module.
Expand Down Expand Up @@ -222,47 +210,3 @@ pub struct InternalConnectionOpts {
)]
pub mongodb_uri: String,
}

#[derive(Parser)]
pub struct TrainOpts {
/// MongoDB connection URI
#[structopt(
long = "mongodb-uri",
default_value = "mongodb://localhost/yastatist?directConnection=true",
env = "BLITZ_DASHBOARD_MONGODB_URI"
)]
pub mongodb_uri: String,

#[clap(
long,
parse(try_from_str = humantime::parse_duration),
default_value = "7d",
env = "BLITZ_DASHBOARD_TRAINER_PERIOD",
)]
pub train_period: time::Duration,

#[clap(
long,
parse(try_from_str = humantime::parse_duration),
default_value = "8h",
env = "BLITZ_DASHBOARD_TRAINER_INTERVAL",
)]
pub train_interval: time::Duration,

#[clap(long, default_value = "::", env = "BLITZ_DASHBOARD_TRAINER_BIND_HOST")]
pub host: String,

#[structopt(
long,
default_value = "8082",
env = "BLITZ_DASHBOARD_TRAINER_BIND_PORT"
)]
pub port: u16,

#[structopt(
long,
default_value = "2",
env = "BLITZ_DASHBOARD_TRAINER_MIN_POINTS_PER_REGRESSION"
)]
pub n_min_points_per_regression: usize,
}
Loading

0 comments on commit 6ab7602

Please sign in to comment.