Feature Summary
I feel performance boost can partially be solved by providing faster UDFs. During the hackathon, a previous prototype of a matrix multiply small bench showed C++: 120x , rust: 170x speedups against python.
If it is of interest for apache/texera I am unsure, but it is worth discussing.
Thanks
Proposed Solution or Design
C++ UDF
#include <chrono>
class MatrixMultiplyOperator : public texera::UDFOperator {
public:
texera::TupleOutput process_tuple(const texera::Tuple& tuple, int port) override {
int trial = tuple.get("trial").as_int();
int n = tuple.get("matrix_size").as_int();
long long seed = tuple.get("seed").as_long();
auto start = std::chrono::high_resolution_clock::now();
double checksum = 0.0;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
double cell = 0.0;
for (int k = 0; k < n; k++) {
double a = ((seed + trial * 97LL + i * 31LL + k * 17LL) % 1000LL) / 1000.0;
double b = ((seed + trial * 53LL + k * 13LL + j * 29LL) % 1000LL) / 1000.0;
cell += a * b;
}
checksum += cell * ((i + 1) * 0.001 + (j + 1) * 0.0001);
}
}
auto end = std::chrono::high_resolution_clock::now();
double elapsed_ms = std::chrono::duration<double, std::milli>(end - start).count();
return { texera::TupleLike{
texera::Value::string_value("cpp"),
texera::Value::double_value(checksum),
texera::Value::double_value(elapsed_ms)
}};
}
};
using TexeraUDFOperator = MatrixMultiplyOperator;
Rust UDF
use std::time::Instant;
#[derive(Default)]
struct MatrixMultiplyOperator;
impl texera::UDFOperator for MatrixMultiplyOperator {
fn process_tuple(
&mut self,
tuple: &texera::Tuple,
_port: i32,
) -> Result<texera::TupleOutput, String> {
let trial = tuple.get_by_name("trial")?.as_int()?;
let n = tuple.get_by_name("matrix_size")?.as_int()?;
let seed = tuple.get_by_name("seed")?.as_long()?;
let start = Instant::now();
let mut checksum = 0.0;
for i in 0..n {
for j in 0..n {
let mut cell = 0.0;
for k in 0..n {
let a = ((seed + trial as i64 * 97 + i as i64 * 31 + k as i64 * 17) % 1000) as f64 / 1000.0;
let b = ((seed + trial as i64 * 53 + k as i64 * 13 + j as i64 * 29) % 1000) as f64 / 1000.0;
cell += a * b;
}
checksum += cell * (((i + 1) as f64) * 0.001 + ((j + 1) as f64) * 0.0001);
}
}
let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
Ok(vec![vec![
texera::Value::string_value("rust"),
texera::Value::double_value(checksum),
texera::Value::double_value(elapsed_ms),
]])
}
}
type TexeraUDFOperator = MatrixMultiplyOperator;
Affected Area
No response
Feature Summary
I feel performance boost can partially be solved by providing faster UDFs. During the hackathon, a previous prototype of a matrix multiply small bench showed C++: 120x , rust: 170x speedups against python.
If it is of interest for apache/texera I am unsure, but it is worth discussing.
Thanks
Proposed Solution or Design
C++ UDF
Rust UDF
Affected Area
No response