Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue 48 #71

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hpc/LoadBalancer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,6 @@ int main(int argc, char *argv[])
std::transform(LB_vector.begin(), LB_vector.end(), LB_ptr_vector.begin(),
[](LoadBalancer& obj) { return &obj; });

std::cout << "Load balancer running port" << port << std::endl;
std::cout << "Load balancer running port " << port << std::endl;
umbridge::serveModels(LB_ptr_vector, "0.0.0.0", port, true, false);
}
14 changes: 14 additions & 0 deletions hpc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,17 @@ load-balancer-files = LoadBalancer.cpp LoadBalancer.hpp ../lib/httplib.h ../lib/

build-load-balancer:
- g++ -O3 -Wno-unused-result -std=c++17 $(load-balancer-files) -o load-balancer -pthread

run-load-balancer:
rm -f retry-respond-job_id.txt

if ! printenv PORT > /dev/null; then \
echo "PORT environment variable not set. Using default value 4242."; \
export PORT=4242; \
fi && \
export HQ_SUBMIT_DELAY_MS=100 && \
while nc -z localhost $$PORT; do \
read -p "Port $$PORT is already in use. Please enter a different port: " NEW_PORT; \
PORT=$${NEW_PORT:-$$PORT}; \
done; \
./load-balancer
21 changes: 15 additions & 6 deletions hpc/hq_scripts/job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
#HQ --stdout none
#HQ --stderr none

# Remove "#HQ --stdout none" and "#HQ --stderr none" if you want to see the output of the job.

# Launch model server, send back server URL
# and wait to ensure that HQ won't schedule any more jobs to this allocation.

function get_avaliable_port {
# Define the range of ports to select from
MIN_PORT=1024
MIN_PORT=49152
MAX_PORT=65535

# Generate a random port number
Expand All @@ -34,14 +36,21 @@ export PORT=$port

load_balancer_dir="/load/balancer/directory" # CHANGE ME!


host=$(hostname -I | awk '{print $1}')

timeout=60 # timeout in seconds, might need to be increased if the model server takes longer to start
echo "Waiting for model server to respond at $host:$port..."
while ! curl -s "http://$host:$port/Info" > /dev/null; do
sleep 1
done
echo "Model server responded"
if timeout $timeout sh -c 'while ! curl -s "http://'"$host"':'"$port"'/Info" > /dev/null ; do :; done'; then
echo "Model server responded within $timeout seconds"
else
echo "Timeout: Model server did not respond within $timeout seconds"
echo "$HQ_JOB_ID" > "$load_balancer_dir/retry-respond-job_id.txt"

# clear the server here if needed

# restart the job
$load_balancer_dir/hq_scripts/job.sh
fi

# Write server URL to file identified by HQ job ID.
mkdir -p "$load_balancer_dir/urls"
Expand Down
26 changes: 26 additions & 0 deletions hpc/test/MultiplyBy2/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
all: build-server build-lb run

load-balancer-files = ../../LoadBalancer.cpp ../../LoadBalancer.hpp ../../../lib/httplib.h ../../../lib/json.hpp ../../../lib/umbridge.h

build-server:
g++ -O3 -w -std=c++11 minimal-server.cpp -o server -lssl -lcrypto -pthread

build-lb:
g++ -O3 -Wno-unused-result -std=c++17 $(load-balancer-files) -o ../../load-balancer -pthread

run:
rm -f retry-port-job_id.txt
rm -f retry-respond-job_id.txt
mkdir -p logs
rm -f logs/*

if ! printenv PORT > /dev/null; then \
echo "PORT environment variable not set. Using default value 4242."; \
export PORT=4242; \
fi && \
export HQ_SUBMIT_DELAY_MS=100 && \
while nc -z localhost $$PORT; do \
read -p "Port $$PORT is already in use. Please enter a different port: " NEW_PORT; \
PORT=$${NEW_PORT:-$$PORT}; \
done; \
cd ../../ && ./load-balancer
31 changes: 31 additions & 0 deletions hpc/test/MultiplyBy2/client.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

# export TEST_DELAY=1e4

if [ -z "$PORT" ]; then
PORT="4242"
fi

echo "Using URL http://localhost:$PORT"

echo "Sending requests..."

for i in {1..300}
do
# Expected output: {"output":[[200.0]]}
# Check if curl output equals expected output
# If not, print error message

if [ "$(curl -s http://localhost:$PORT/Evaluate -X POST -d '{"name": "forward", "input": [[100.0]]}')" == '{"output":[[200.0]]}' ]; then
echo -n "y"
else
echo $(curl -s http://localhost:$PORT/Evaluate -X POST -d '{"name": "forward", "input": [[100.0]]}')
echo -n "n"
#echo "Error: curl output does not equal expected output"
fi &

done

echo "Requests sent. Waiting for responses..."

wait
61 changes: 61 additions & 0 deletions hpc/test/MultiplyBy2/job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#! /bin/bash

#HQ --cpus=1
#HQ --time-request=1m
#HQ --time-limit=2m
#HQ --stdout %{CWD}/test/MultiplyBy2/logs/job-%{JOB_ID}.out
#HQ --stderr %{CWD}/test/MultiplyBy2/logs/job-%{JOB_ID}.err

# Launch model server, send back server URL
# and wait to ensure that HQ won't schedule any more jobs to this allocation.


# Define the range of ports to select from
MIN_PORT=49152
MAX_PORT=65535
# Generate a random port number
port=$(shuf -i $MIN_PORT-$MAX_PORT -n 1)
# Check if the port is in use
try_count=0
echo "$(lsof -Pi :$port -sTCP:LISTEN -t )"
while [ -n "$(lsof -Pi :$port -sTCP:LISTEN -t )" ]
do
echo "Port $port is in use, trying another port"
# If the port is in use, generate a new port number
port=$(shuf -i $MIN_PORT-$MAX_PORT -n 1)

try_count=$((try_count+1))

echo "$HQ_JOB_ID" > "./test/MultiplyBy2/retry-port-job_id.txt"
done
echo "Selected port $port after $try_count tries"

echo "Starting server on port $port"
export PORT=$port

# Assume that server sets the port according to the environment variable 'PORT'.
./test/MultiplyBy2/server & # CHANGE ME!

load_balancer_dir="./" # CHANGE ME!

host=$(hostname -I | awk '{print $1}')

timeout=30 # timeout in seconds
echo "Waiting for model server to respond at $host:$port..."
if timeout $timeout sh -c 'while ! curl -s "http://'"$host"':'"$port"'/Info" > /dev/null ; do :; done'; then
echo "Model server responded within $timeout seconds"
else
echo "Timeout: Model server did not respond within $timeout seconds"
echo "$HQ_JOB_ID" > "./test/MultiplyBy2/retry-respond-job_id.txt"

# clear the server here if needed

# restart the job
$load_balancer_dir/hq_scripts/job.sh
fi

# Write server URL to file identified by HQ job ID.
mkdir -p "$load_balancer_dir/urls"
echo "http://$host:$port" > "$load_balancer_dir/urls/url-$HQ_JOB_ID.txt"

sleep infinity # keep the job occupied
37 changes: 36 additions & 1 deletion hpc/test/MultiplyBy2/minimal-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,36 @@
#include <string>
#include <chrono>
#include <thread>

#include <cstdlib>
#include <cstring>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "../../../lib/umbridge.h"


bool isPortInUse(int port) {
int sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0) {
std::cerr << "Failed to create socket." << std::endl;
return false;
}

struct sockaddr_in serverAddress;
memset(&serverAddress, 0, sizeof(serverAddress));
serverAddress.sin_family = AF_INET;
serverAddress.sin_addr.s_addr = htonl(INADDR_ANY);
serverAddress.sin_port = htons(port);

if (bind(sockfd, (struct sockaddr*)&serverAddress, sizeof(serverAddress)) < 0) {
close(sockfd);
return true;
}

close(sockfd);
return false;
}

class ExampleModel : public umbridge::Model
{
public:
Expand Down Expand Up @@ -79,6 +106,14 @@ int main(int argc, char *argv[])
else
{
port = atoi(port_cstr);
std::cout << "Using port [ " << port_cstr << " ] as specified by environment variable PORT." << std::endl;
if (isPortInUse(port))
{
std::cerr << "Port " << port << " is already in use. Exiting." << std::endl;

exit(-1);
}

}

char const *delay_cstr = std::getenv("TEST_DELAY");
Expand Down
Loading