Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
debug/
target/

# ignore fixed length mock files
.flf

# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
Expand Down
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ default-run = "evolution"
arrow2 = { version = "0.18.0", features = ["io_ipc"] }
chrono = "0.4.31"
clap = { version = "4.4.8", features = ["derive"] }
crossbeam = "0.8.2"
colored = "2.0.4"
env_logger = "0.10.1"
half = "2.3.1"
Expand All @@ -30,6 +31,7 @@ padder = "0.1.0"
rand = "0.8.5"
serde = { version = "1.0.193", features = ["derive"] }
serde_json = "1.0.108"
threadpool = "1.8.1"

[dev-dependencies]
glob = "0.3.1"
Expand Down
77 changes: 52 additions & 25 deletions src/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
*/

use crate::schema::{self, FixedSchema};
use crossbeam::channel;
use log::{debug, info};
use rand::distributions::{Alphanumeric, DistString};
use std::fs::OpenOptions;
Expand Down Expand Up @@ -142,24 +143,19 @@ pub(crate) fn mock_from_schema(schema_path: String, n_rows: usize) {
//mocker.generate_threaded(n_rows, 5);
}

fn generate_from_thread(thread: usize, schema: Arc<FixedSchema>, n_rows: usize) {
fn generate_from_thread(
thread: usize,
schema: Arc<FixedSchema>,
n_rows: usize,
sender: channel::Sender<Vec<u8>>,
) {
let rowlen = schema.row_len();
let mut buffer: Vec<u8> =
Vec::with_capacity(DEFAULT_ROW_BUFFER_LEN * rowlen + DEFAULT_ROW_BUFFER_LEN * 2);
let mut path = PathBuf::from(
Alphanumeric.sample_string(&mut rand::thread_rng(), DEFAULT_MOCKED_FILENAME_LEN),
);
path.set_extension("flf");

let mut file = OpenOptions::new()
.create_new(true)
.append(true)
.open(path)
.unwrap();

for row in 0..n_rows {
if row % DEFAULT_ROW_BUFFER_LEN == 0 {
file.write_all(&buffer).expect("Bad buffer, write failed!");
sender.send(buffer).expect("Bad buffer, send failed");
buffer =
Vec::with_capacity(DEFAULT_ROW_BUFFER_LEN * rowlen + DEFAULT_ROW_BUFFER_LEN * 2);
}
Expand All @@ -176,38 +172,60 @@ fn generate_from_thread(thread: usize, schema: Arc<FixedSchema>, n_rows: usize)

buffer.extend_from_slice("\r\n".as_bytes());
}

file.write_all(&buffer).expect("Bad buffer, write failed!");
sender.send(buffer).expect("Bad buffer, send failed");

println!("thread {} done!", thread);
drop(sender);
}

fn distribute_thread_workload(n_rows: usize, n_threads: usize) -> Vec<usize> {
let thread_local_rows = (n_rows - 1) / n_threads + 1;
let remaining_rows = thread_local_rows * n_threads - n_rows;
let mut thread_workload: Vec<usize> = Vec::with_capacity(n_threads);
for n in 0..n_threads {
let spare = if n < remaining_rows { 1 } else { 0 };
let rows_to_process = n + thread_local_rows - spare;
thread_workload.insert(n, rows_to_process - n);
}
thread_workload
}
///
fn generate_threaded(schema: FixedSchema, n_rows: usize, n_threads: usize) {
let n_rows_per_thread = n_rows / n_threads;
let n_buffers_per_thread = std::cmp::max(n_rows_per_thread / DEFAULT_ROW_BUFFER_LEN, 1);

let rest = n_rows - (n_rows_per_thread * n_threads);

println!("n rows per thread: {}", n_rows_per_thread);
println!("n buffers per thread (total): {}", n_buffers_per_thread);
println!("slask rest: {}", rest);
let worksize = distribute_thread_workload(n_rows, n_threads);

let arc_schema = Arc::new(schema);

let (sender, receiver) = channel::unbounded();

let threads: Vec<_> = (0..n_threads)
.map(|t| {
let arc_clone = Arc::clone(&arc_schema);
thread::spawn(move || generate_from_thread(t, arc_clone, n_rows_per_thread))
let sender_clone = sender.clone();
let rows = worksize[t];
thread::spawn(move || generate_from_thread(t, arc_clone, rows, sender_clone))
})
.collect();
drop(sender);
write_mock_file(&receiver);

for handle in threads {
handle.join().unwrap();
}
}

fn write_mock_file(receiver: &channel::Receiver<Vec<u8>>) {
let mut path = PathBuf::from(String::from("resources/mock_files/test_mock"));
path.set_extension("flf");

// TODO: generate rest of rows
println!("still have slask rest: {}", rest);
let mut file = OpenOptions::new()
.create(true)
.write(true)
.open(path)
.unwrap();

for buf in receiver {
file.write_all(&buf).expect("Bad buffer, write failed!");
}
}

///
Expand Down Expand Up @@ -256,4 +274,13 @@ mod tests_mock {
}
}
}

#[test]
fn test_workload_distribution_16_threads_1000_rows() {
let threads = 16;
let rows = 1000;
let workload = distribute_thread_workload(rows, threads);
let row_sum: usize = workload.iter().sum();
assert_eq!(rows, row_sum)
}
}