Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions benchpress/config/jobs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,36 @@
- 'benchmarks/feedsim/feedsim-multi-inst-*.log'
- 'benchmarks/feedsim/src/perf.data'

- name: feedsim_autoscale_mini
benchmark: feedsim_autoscale
description: >
Aggregator like workload. Latency sensitive.
The feedsim_autoscale mini benchmark jobs
are configured with a fixed QPS.
args:
- '-n {num_instances}'
- '-q {fixed_qps}'
- '-d {fixed_qps_duration}'
- '-w {warmup_time}'
- '-S {graph_store_path}'
- '-L {graph_load_path}'
- '{extra_args}'
vars:
- 'num_instances=-1'
- 'fixed_qps=100'
- 'fixed_qps_duration=300'
- 'warmup_time=120'
- 'graph_store_path=default_do_not_store'
- 'graph_load_path=default_do_not_load'
- 'extra_args='
hooks:
- hook: copymove
options:
is_move: true
after:
- 'benchmarks/feedsim/feedsim_results*.txt'
- 'benchmarks/feedsim/feedsim-multi-inst-*.log'
- 'benchmarks/feedsim/src/perf.data'

- name: feedsim_autoscale_arm
benchmark: feedsim_autoscale
Expand Down Expand Up @@ -492,6 +522,38 @@
- 'benchmarks/feedsim/feedsim-multi-inst-*.log'
- 'benchmarks/feedsim/src/perf.data'

- name: feedsim_autoscale_arm_mini
benchmark: feedsim_autoscale
description: >
Aggregator like workload. Latency sensitive.
The feedsim_autoscale mini benchmark jobs
are configured with a fixed QPS.
Parameters tuned for arm.
args:
- '-n {num_instances}'
- '-i {icache_iterations}'
- '-q {fixed_qps}'
- '-d {fixed_qps_duration}'
- '-w {warmup_time}'
- '{extra_args}'
vars:
- 'num_instances=-1'
- 'icache_iterations=400000'
- 'fixed_qps=100'
- 'fixed_qps_duration=300'
- 'warmup_time=120'
- 'graph_store_path=default_do_not_store'
- 'graph_load_path=default_do_not_load'
- 'extra_args='
hooks:
- hook: copymove
options:
is_move: true
after:
- 'benchmarks/feedsim/feedsim_results*.txt'
- 'benchmarks/feedsim/feedsim-multi-inst-*.log'
- 'benchmarks/feedsim/src/perf.data'


- benchmark: spark_standalone
name: spark_standalone_local
Expand Down
36 changes: 33 additions & 3 deletions packages/feedsim/run-feedsim-multi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,30 @@ NUM_INSTANCES="$(( ( NCPU + 99 ) / 100 ))"

NUM_ICACHE_ITERATIONS="1600000"

show_help() {
cat <<EOF
Usage: ${0##*/} [OPTION]...

-h Display this help and exit
-n Number of parallel instances to run. Default: $(( ( NCPU + 99 ) / 100 ))
-i Number of icache iterations to use. Default: 1600000
-S Store the generated graph to a file (requires a file path)
-L Load a graph from a file instead of generating one (requires a file path)
-I Enable timing instrumentation for graph operations (build, store, load)

Any remaining arguments are passed to run.sh

EOF
}

SCRIPT_NAME="$(basename "$0")"
echo "${SCRIPT_NAME}: DCPERF_PERF_RECORD=${DCPERF_PERF_RECORD}"

# Initialize variables for graph storage and loading
STORE_GRAPH=""
LOAD_GRAPH=""
INSTRUMENT_GRAPH=""

while [ $# -ne 0 ]; do
case $1 in
-n)
Expand All @@ -34,6 +55,15 @@ while [ $# -ne 0 ]; do
-i)
NUM_ICACHE_ITERATIONS="$2"
;;
-S)
STORE_GRAPH="-S $2"
;;
-L)
LOAD_GRAPH="-L $2"
;;
-I)
INSTRUMENT_GRAPH="-I"
;;
-h|--help)
show_help >&2
exit 1
Expand All @@ -43,7 +73,7 @@ while [ $# -ne 0 ]; do
esac

case $1 in
-n|-i)
-n|-i|-S|-L)
if [ -z "$2" ]; then
echo "Invalid option: '$1' requires an argument" 1>&2
exit 1
Expand Down Expand Up @@ -99,10 +129,10 @@ echo > $BREPS_LFILE
# shellcheck disable=SC2086
for i in $(seq 1 ${NUM_INSTANCES}); do
CORE_RANGE="$(get_cpu_range "${NUM_INSTANCES}" "$((i - 1))")"
CMD="IS_AUTOSCALE_RUN=${NUM_INSTANCES} taskset --cpu-list ${CORE_RANGE} ${FEEDSIM_ROOT}/run.sh -p ${PORT} -i ${NUM_ICACHE_ITERATIONS} -o feedsim_results_${FIXQPS_SUFFIX}${i}.txt $*"
CMD="IS_AUTOSCALE_RUN=${NUM_INSTANCES} taskset --cpu-list ${CORE_RANGE} ${FEEDSIM_ROOT}/run.sh -p ${PORT} -i ${NUM_ICACHE_ITERATIONS} -o feedsim_results_${FIXQPS_SUFFIX}${i}.txt ${STORE_GRAPH} ${LOAD_GRAPH} ${INSTRUMENT_GRAPH} $*"
echo "$CMD" > "${FEEDSIM_LOG_PREFIX}${i}.log"
# shellcheck disable=SC2068,SC2069
IS_AUTOSCALE_RUN=${NUM_INSTANCES} stdbuf -i0 -o0 -e0 taskset --cpu-list "${CORE_RANGE}" "${FEEDSIM_ROOT}"/run.sh -p "${PORT}" -i "${NUM_ICACHE_ITERATIONS}" -o "feedsim_results_${FIXQPS_SUFFIX}${i}.txt" $@ 2>&1 > "${FEEDSIM_LOG_PREFIX}${i}.log" &
IS_AUTOSCALE_RUN=${NUM_INSTANCES} stdbuf -i0 -o0 -e0 taskset --cpu-list "${CORE_RANGE}" "${FEEDSIM_ROOT}"/run.sh -p "${PORT}" -i "${NUM_ICACHE_ITERATIONS}" -o "feedsim_results_${FIXQPS_SUFFIX}${i}.txt" ${STORE_GRAPH} ${LOAD_GRAPH} ${INSTRUMENT_GRAPH} $@ 2>&1 > "${FEEDSIM_LOG_PREFIX}${i}.log" &
PIDS+=("$!")
PHY_CORE_ID=$((PHY_CORE_ID + CORES_PER_INST))
SMT_ID=$((SMT_ID + CORES_PER_INST))
Expand Down
53 changes: 48 additions & 5 deletions packages/feedsim/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ Usage: ${0##*/} [OPTION]...
-d Duration of each load testing experiment, in seconds. Default: 300
-p Port to use by the LeafNodeRank server and the load drivers. Default: 11222
-o Result output file name. Default: "feedsim_results.txt"
-S Store the generated graph to a file (requires a file path)
-L Load a graph from a file instead of generating one (requires a file path)
-I Enable timing instrumentation for graph operations (build, store, load)
EOF
}

Expand Down Expand Up @@ -122,6 +125,17 @@ main() {
local icache_iterations
icache_iterations="1600000"

# Graph storage and loading options
local store_graph
store_graph=""

local load_graph
load_graph=""

local instrument_graph
instrument_graph=""


if [ -z "$IS_AUTOSCALE_RUN" ]; then
echo > $BREPS_LFILE
fi
Expand Down Expand Up @@ -162,6 +176,19 @@ main() {
-i)
icache_iterations="$2"
;;
-S)
if [ "$2" != "default_do_not_store" ]; then
store_graph="--store_graph=$2"
fi
;;
-L)
if [ "$2" != "default_do_not_load" ]; then
load_graph="--load_graph=$2"
fi
;;
-I)
instrument_graph="--instrument_graph"
;;
-h|--help)
show_help >&2
exit 1
Expand All @@ -172,7 +199,7 @@ main() {
esac

case $1 in
-t|-c|-s|-d|-p|-q|-o|-w|-i|-l)
-t|-c|-s|-d|-p|-q|-o|-w|-i|-l|-S|-L)
if [ -z "$2" ]; then
echo "Invalid option: '$1' requires an argument" 1>&2
exit 1
Expand Down Expand Up @@ -208,13 +235,29 @@ main() {
--num_objects=2000 \
--graph_max_iters=1 \
--noaffinity \
--min_icache_iterations="$icache_iterations" &
--min_icache_iterations="$icache_iterations" \
"$store_graph" \
"$load_graph" \
"$instrument_graph" >> $BREPS_LFILE 2>&1 &

LEAF_PID=$!

# FIXME(cltorres)
# Remove sleep, expose an endpoint or print a message to notify service is ready
sleep 30
# Wait for server to be fully ready using monitoring endpoint
echo "Waiting for LeafNodeRank server to be ready on monitor port $monitor_port..."
max_attempts=30
attempt=0
while [ $attempt -lt $max_attempts ]; do
if curl -f -s "http://localhost:$monitor_port/topology" > /dev/null 2>&1; then
echo "LeafNodeRank server is ready (monitor port responding)"
break
fi
attempt=$((attempt + 1))
if [ $attempt -eq $max_attempts ]; then
echo "ERROR: Server failed to become ready within $max_attempts seconds"
exit 1
fi
sleep 1
done

# FIXME(cltorres)
# Skip ParentNode for now, and talk directly to LeafNode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ struct ThreadData {
std::string random_string;
};

// Global graph that will be shared across threads
CSRGraph<int32_t> g_shared_graph;

void ThreadStartup(
oldisim::NodeThread& thread,
std::vector<ThreadData>& thread_data,
Expand All @@ -85,7 +88,8 @@ void ThreadStartup(
const std::shared_ptr<folly::IOThreadPoolExecutor>& ioThreadPool,
const std::shared_ptr<ranking::TimekeeperPool>& timekeeperPool) {
auto& this_thread = thread_data[thread.get_thread_num()];
auto graph = params.buildGraph();
// auto graph = params.buildGraph();
auto graph = params.makeGraphCopy(g_shared_graph);
this_thread.cpuThreadPool = cpuThreadPool;
this_thread.srvCPUThreadPool = srvCPUThreadPool;
this_thread.srvIOThreadPool = srvIOThreadPool;
Expand Down Expand Up @@ -307,6 +311,42 @@ int main(int argc, char** argv) {
std::vector<ThreadData> thread_data(args.threads_arg);
ranking::dwarfs::PageRankParams params{
args.graph_scale_arg, args.graph_degree_arg};

// create or load a graph

if (args.load_graph_given) {
if (args.instrument_graph_given) {
auto start_load = std::chrono::steady_clock::now();
g_shared_graph = params.loadGraphFromFile(args.load_graph_arg);
auto end_load = std::chrono::steady_clock::now();
auto load_duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_load - start_load).count();
std::cout << "Graph loading time: " << load_duration << " ms" << std::endl;
} else {
g_shared_graph = params.loadGraphFromFile(args.load_graph_arg);
}
} else {
if (args.instrument_graph_given) {
auto start_build = std::chrono::steady_clock::now();
g_shared_graph = params.buildGraph();
auto end_build = std::chrono::steady_clock::now();
auto build_duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_build - start_build).count();
std::cout << "Graph building time: " << build_duration << " ms" << std::endl;

if (args.store_graph_given) {
auto start_store = std::chrono::steady_clock::now();
params.storeGraphToFile(g_shared_graph, args.store_graph_arg);
auto end_store = std::chrono::steady_clock::now();
auto store_duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_store - start_store).count();
std::cout << "Graph storing time: " << store_duration << " ms" << std::endl;
}
} else {
g_shared_graph = params.buildGraph();
if (args.store_graph_given) {
params.storeGraphToFile(g_shared_graph, args.store_graph_arg);
}
}
}

oldisim::LeafNodeServer server(args.port_arg);
server.SetThreadStartupCallback([&](auto&& thread) {
return ThreadStartup(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ option "graph_max_iters" - "Perform at most 'graph_max_iters' iterations during
option "graph_subset" - "Perform partial PageRank over these numbers of nodes. 0 indicates all nodes." int default="3145728"
option "num_objects" - "Number of objects to serialize." int default="40"
option "random_data_size" - "Number of bytes of string random data." int default="3145728"
option "store_graph" - "Enable storing the generated graph to a file." string typestr="filename" optional
option "load_graph" - "Enable loading a graph from a file instead of generating one." string typestr="filename" optional
option "instrument_graph" - "Enable timing instrumentation for graph operations (build, store, load)."
option "max_response_size" - "Maximum response size in bytes returned by the leaf server." int default="131072"
option "compression_data_size" - "Number of bytes to compress per request." int default="131072"
option "rank_trials_per_thread" - "Number of iterations each CPU thread executes of rank work." int default="1"
Expand Down
Loading