From 37a6aadb0fb274aa859ed3c2f9868b6e840e1929 Mon Sep 17 00:00:00 2001
From: Varun Sharma <varun.sharma@xilinx.com>
Date: Fri, 25 Feb 2022 14:38:34 -0800
Subject: [PATCH] Add initial gRPC implementation

---
 CMakeLists.txt                            |   7 +-
 docs/dependencies.rst                     |   2 +-
 include/proteus/core/data_types.hpp       |   2 +
 proteus                                   |  15 +-
 src/proteus/CMakeLists.txt                |  10 +-
 src/proteus/build_options.hpp.in          |   2 +
 src/proteus/core/CMakeLists.txt           |  54 ++--
 src/proteus/core/data_types.cpp           |  42 +++
 src/proteus/core/manager.hpp              |  18 +-
 src/proteus/core/predict_api.proto        | 318 ++++++++++++++++++++++
 src/proteus/core/predict_api_internal.cpp | 181 ++++++++++++
 src/proteus/core/predict_api_internal.hpp |  46 ++--
 src/proteus/main.cpp                      |  12 +-
 src/proteus/servers/CMakeLists.txt        |   9 +
 src/proteus/servers/grpc_server.cpp       | 317 +++++++++++++++++++++
 src/proteus/servers/grpc_server.hpp       | 144 ++++++++++
 src/proteus/workers/CMakeLists.txt        |   6 +-
 tests/cpp/CMakeLists.txt                  |   1 +
 tests/cpp/grpc/CMakeLists.txt             |  24 ++
 tools/autotest.sh                         |  27 --
 20 files changed, 1144 insertions(+), 93 deletions(-)
 create mode 100644 src/proteus/core/predict_api.proto
 create mode 100644 src/proteus/servers/grpc_server.cpp
 create mode 100644 src/proteus/servers/grpc_server.hpp
 create mode 100644 tests/cpp/grpc/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 36a6d31b5..75197cfb4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -78,6 +78,9 @@ find_package(OpenCV)
 find_package(opentelemetry-cpp CONFIG)
 find_package(spdlog)
 find_package(Threads REQUIRED)
+find_package(Protobuf)
+find_package(absl CONFIG)
+find_package(gRPC CONFIG)
 
 find_package(xir QUIET)
 find_package(vart QUIET)
@@ -103,6 +106,7 @@ endif()
 configure_file(${PROJECT_SOURCE_DIR}/src/proteus/version.hpp.in ${PROJECT_SOURCE_DIR}/src/proteus/version.hpp)
 
 option(PROTEUS_ENABLE_REST "Enable the REST server" ON)
+option(PROTEUS_ENABLE_GRPC "Enable the gRPC server" ${gRPC_FOUND})
 option(PROTEUS_ENABLE_METRICS "Enable Prometheus metrics" ON)
 option(PROTEUS_ENABLE_LOGGING "Enable logging" ${spdlog_FOUND})
 option(PROTEUS_ENABLE_TRACING "Enable Jaeger tracing" ${opentelemetry-cpp_FOUND})
@@ -111,7 +115,7 @@ option(PROTEUS_ENABLE_VITIS "Enable Vitis dependencies" ${PROTEUS_VITIS_FOUND})
 option(PROTEUS_BUILD_EXAMPLES "Build examples" ON)
 option(PROTEUS_BUILD_SHARED "Build Proteus as a shared library" ON)
 option(PROTEUS_BUILD_TESTING "Build C++ tests" ${PROTEUS_BUILD_TESTING_DEFAULT})
-option(PROTEUS_ENABLE_IPO "Enable interprocedural optimizations" ON)
+option(PROTEUS_ENABLE_IPO "Enable interprocedural optimizations" OFF)
 
 if(${PROTEUS_ENABLE_REST} OR ${PROTEUS_ENABLE_METRICS})
     set(PROTEUS_ENABLE_HTTP ON)
@@ -123,6 +127,7 @@ set(PROTEUS_DEFAULT_HTTP_PORT 8998)
 
 message(STATUS "Build Options:")
 message(STATUS "  PROTEUS_ENABLE_REST: " ${PROTEUS_ENABLE_REST})
+message(STATUS "  PROTEUS_ENABLE_GRPC: " ${PROTEUS_ENABLE_GRPC})
 message(STATUS "  PROTEUS_ENABLE_METRICS: " ${PROTEUS_ENABLE_METRICS})
 message(STATUS "  PROTEUS_ENABLE_LOGGING: " ${PROTEUS_ENABLE_LOGGING})
 message(STATUS "  PROTEUS_ENABLE_TRACING: " ${PROTEUS_ENABLE_TRACING})
diff --git a/docs/dependencies.rst b/docs/dependencies.rst
index 8a8c23f90..d2df34817 100644
--- a/docs/dependencies.rst
+++ b/docs/dependencies.rst
@@ -172,7 +172,7 @@ The following files are included in the Xilinx Inference Server repository under
     CodeCoverage.cmake,:github:`bilke/cmake-modules`,`CodeCoverage.cmake <https://www.github.com/bilke/cmake-modules/blob/master/CodeCoverage.cmake>`__,BSD-3,Cmake module for test coverage measurement
     ctpl.h,:github:`vit-vit/CTPL`,`ctpl.h <https://www.github.com/vit-vit/CTPL/blob/master/ctpl.h>`__,Apache 2.0,C++ Thread pool library
     dog-3619020_640.jpg,`Pixabay <https://pixabay.com/photos/dog-spitz-smile-ginger-home-pet-3619020/>`__,`dog-3619020_640.jpg <https://cdn.pixabay.com/photo/2018/08/20/14/08/dog-3619020_640.jpg>`__,`Pixabay License <https://pixabay.com/service/license/>`_,Used for testing
-    proteusConfig.cmake,:github:`alexreinking/SharedStaticStarter`,`SomeLibConfig.cmake <https://www.github.com/alexreinking/SharedStaticStarter/blob/master/packaging/SomeLibConfig.cmake>`__,MIT,Cmake module for installing libraries
+    proteusConfig.cmake,:github:`alexreinking/SharedStaticStarter`,`SomeLibConfig.cmake <https://www.github.com/alexreinking/SharedStaticStarter/blob/master/packaging/SomeLibConfig.cmake>`__,MIT,CMake module for installing libraries
     Queue.js,`Kate Rose Morley <https://code.iamkate.com/javascript/queues/>`__,`Queue.src.js <https://code.iamkate.com/javascript/queues/Queue.src.js>`__,`CC0 1.0 Universal <https://creativecommons.org/publicdomain/zero/1.0/legalcode>`__,JavaScript class for a queue
     sport-1284275_640.jpg,`Pixabay <https://pixabay.com/photos/sport-skateboard-skateboarding-fun-1284275/>`__,`sport-1284275_640.jpg <https://cdn.pixabay.com/photo/2016/03/27/21/05/sport-1284275_640.jpg>`__,`Pixabay License <https://pixabay.com/service/license/>`_,Used for testing
 
diff --git a/include/proteus/core/data_types.hpp b/include/proteus/core/data_types.hpp
index 04190571a..89eeb2b22 100644
--- a/include/proteus/core/data_types.hpp
+++ b/include/proteus/core/data_types.hpp
@@ -108,6 +108,8 @@ constexpr size_t getSize(DataType type) {
  */
 std::string mapTypeToStr(DataType type);
 
+DataType mapStrToType(const std::string& type);
+
 std::ostream& operator<<(std::ostream& os, const DataType& bar);
 
 #ifdef PROTEUS_ENABLE_VITIS
diff --git a/proteus b/proteus
index bec7a95b5..c3541bf23 100755
--- a/proteus
+++ b/proteus
@@ -109,15 +109,20 @@ def run_tty_command(command: str, dry_run: bool):
         sys.exit(exit_code)
 
 
-def run_command(command: str, dry_run: bool, exit_on_error=True):
+def run_command(command: str, dry_run: bool, exit_on_error=True, quiet=False):
     if dry_run:
         print(command)
         return
 
     try:
-        subprocess.check_call(
-            shlex.split(command), stdout=sys.stdout, stderr=subprocess.STDOUT
-        )
+        if quiet:
+            subprocess.check_call(
+                shlex.split(command), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
+            )
+        else:
+            subprocess.check_call(
+                shlex.split(command), stdout=sys.stdout, stderr=subprocess.STDOUT
+            )
     except subprocess.CalledProcessError as error:
         if exit_on_error:
             sys.exit(error.returncode)
@@ -397,6 +402,8 @@ class Build:
         shared_libs = " ".join(glob.glob(f"{str(build_dir)}/src/proteus/workers/*.so"))
         command = f"cp -fs {shared_libs} {path}/"
         run_command(command, args.dry_run)
+        command = f"symlinks -rc {path}"
+        run_command(command, args.dry_run, quiet=True)
         if not args.dry_run:
             print("")
             print("======================")
diff --git a/src/proteus/CMakeLists.txt b/src/proteus/CMakeLists.txt
index de0794dc5..662f2978f 100644
--- a/src/proteus/CMakeLists.txt
+++ b/src/proteus/CMakeLists.txt
@@ -14,14 +14,11 @@
 
 add_executable(proteus-server main.cpp)
 target_include_directories(proteus-server PRIVATE ${PROTEUS_INCLUDE_DIRS})
+target_include_directories(proteus-server PRIVATE $<TARGET_PROPERTY:lib_grpc,INCLUDE_DIRECTORIES>)
 target_link_options(proteus-server PRIVATE "LINKER:-E")
 enable_ipo_on_target(proteus-server)
 
-target_link_libraries(proteus-server PRIVATE batching buffers clients core)
-
-if(${PROTEUS_ENABLE_HTTP})
-  target_link_libraries(proteus-server PRIVATE http_server websocket_server)
-endif()
+target_link_libraries(proteus-server PRIVATE batching buffers clients core servers)
 
 add_subdirectory(batching)
 add_subdirectory(buffers)
@@ -66,6 +63,9 @@ endif()
 if(${PROTEUS_ENABLE_HTTP})
     target_link_libraries(proteus PRIVATE drogon trantor)
 endif()
+if(${PROTEUS_ENABLE_GRPC})
+    target_link_libraries(proteus PRIVATE lib_grpc)
+endif()
 if(${PROTEUS_ENABLE_TRACING})
     get_target_property(TRACING_LINK_LIBS tracing INTERFACE_LINK_LIBRARIES)
     target_link_libraries(proteus PRIVATE ${TRACING_LINK_LIBS})
diff --git a/src/proteus/build_options.hpp.in b/src/proteus/build_options.hpp.in
index f3b032ec6..b55fbf54c 100644
--- a/src/proteus/build_options.hpp.in
+++ b/src/proteus/build_options.hpp.in
@@ -27,6 +27,8 @@
 #cmakedefine PROTEUS_ENABLE_METRICS
 /// Enables Proteus's HTTP server
 #cmakedefine PROTEUS_ENABLE_HTTP
+/// Enables Proteus's gRPC server
+#cmakedefine PROTEUS_ENABLE_GRPC
 /// Enables Proteus's tracing
 #cmakedefine PROTEUS_ENABLE_TRACING
 /// Enables Proteus's logging
diff --git a/src/proteus/core/CMakeLists.txt b/src/proteus/core/CMakeLists.txt
index fd915f46d..a9b655f9c 100644
--- a/src/proteus/core/CMakeLists.txt
+++ b/src/proteus/core/CMakeLists.txt
@@ -13,41 +13,59 @@
 # limitations under the License.
 
 list(APPEND targets
-  predict_api
-  predict_api_internal
-  fake_predict_api
-  manager
-  worker_info
-  data_types
-  interface
+    predict_api
+    predict_api_internal
+    fake_predict_api
+    manager
+    worker_info
+    data_types
+    interface
 )
 
 foreach(target ${targets})
-  add_library(${target} OBJECT ${target}.cpp)
-  target_include_directories(${target} PRIVATE ${PROTEUS_INCLUDE_DIRS})
-  enable_ipo_on_target(${target})
+    add_library(${target} OBJECT ${target}.cpp)
+    target_include_directories(${target} PRIVATE ${PROTEUS_INCLUDE_DIRS})
+    enable_ipo_on_target(${target})
 
-  list(APPEND CORE_OBJS $<TARGET_OBJECTS:${target}>)
+    list(APPEND CORE_OBJS $<TARGET_OBJECTS:${target}>)
 endforeach()
 
+if(${PROTEUS_ENABLE_GRPC})
+    target_include_directories(predict_api_internal PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+    target_link_libraries(predict_api_internal INTERFACE lib_grpc)
+endif()
 target_link_libraries(predict_api INTERFACE jsoncpp)
 target_link_libraries(worker_info INTERFACE ${CMAKE_DL_LIBS})
 if(${PROTEUS_ENABLE_VITIS})
-  target_link_libraries(data_types INTERFACE xir)
+    target_link_libraries(data_types INTERFACE xir)
 endif()
 
 add_library(lib_predict_api INTERFACE)
 target_link_libraries(lib_predict_api INTERFACE
-  $<TARGET_OBJECTS:predict_api>
-  predict_api
-  $<TARGET_OBJECTS:predict_api_internal>
-  predict_api_internal
+    $<TARGET_OBJECTS:predict_api>
+    predict_api
+    $<TARGET_OBJECTS:predict_api_internal>
+    predict_api_internal
 )
 
+if(${PROTEUS_ENABLE_GRPC})
+    add_library(lib_grpc predict_api.proto)
+    target_include_directories(lib_grpc PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+    target_link_libraries(lib_grpc INTERFACE gRPC::grpc++)
+
+    get_target_property(grpc_cpp_plugin_location gRPC::grpc_cpp_plugin LOCATION)
+    protobuf_generate(TARGET lib_grpc LANGUAGE cpp)
+    protobuf_generate(TARGET lib_grpc LANGUAGE grpc GENERATE_EXTENSIONS .grpc.pb.h .grpc.pb.cc PLUGIN "protoc-gen-grpc=${grpc_cpp_plugin_location}")
+endif()
+
 add_library(core INTERFACE)
 target_link_libraries(core INTERFACE
-  ${CORE_OBJS}
-  ${targets}
+    ${CORE_OBJS}
+    ${targets}
 )
 
+if(${PROTEUS_ENABLE_GRPC})
+    target_link_libraries(core INTERFACE lib_grpc)
+endif()
+
 set(CORE_OBJS ${CORE_OBJS} PARENT_SCOPE)
diff --git a/src/proteus/core/data_types.cpp b/src/proteus/core/data_types.cpp
index 53bee2299..00a626d31 100644
--- a/src/proteus/core/data_types.cpp
+++ b/src/proteus/core/data_types.cpp
@@ -168,6 +168,48 @@ std::string mapTypeToStr(DataType type) {
   }
 }
 
+// TODO(varunsh): fix long if-else tree
+DataType mapStrToType(const std::string& type) {
+  if (type == "BOOL") {
+    return DataType::BOOL;
+  }
+  if (type == "UINT8") {
+    return DataType::UINT8;
+  }
+  if (type == "UINT16") {
+    return DataType::UINT16;
+  }
+  if (type == "UINT32") {
+    return DataType::UINT32;
+  }
+  if (type == "UINT64") {
+    return DataType::UINT64;
+  }
+  if (type == "INT8") {
+    return DataType::INT8;
+  }
+  if (type == "INT16") {
+    return DataType::INT16;
+  }
+  if (type == "INT32") {
+    return DataType::INT32;
+  }
+  if (type == "INT64") {
+    return DataType::INT64;
+  }
+  if (type == "FP16") {
+    return DataType::FP16;
+  }
+  if (type == "FP32") {
+    return DataType::FP32;
+  }
+  if (type == "FP64") {
+    return DataType::FP64;
+  } else {
+    return DataType::STRING;
+  }
+}
+
 std::ostream& operator<<(std::ostream& os, const DataType& bar) {
   return os << mapTypeToStr(bar);
 }
diff --git a/src/proteus/core/manager.hpp b/src/proteus/core/manager.hpp
index 0efffe6a3..7c7e61194 100644
--- a/src/proteus/core/manager.hpp
+++ b/src/proteus/core/manager.hpp
@@ -29,9 +29,8 @@
 #include <unordered_map>  // for unordered_map
 #include <utility>        // for move, pair
 
-#include "proteus/build_options.hpp"     // for PROTEUS_ENABLE_LOGGING
-#include "proteus/core/predict_api.hpp"  // for RequestParameters
-// #include "proteus/core/worker_info.hpp"     // for WorkerInfo
+#include "proteus/build_options.hpp"        // for PROTEUS_ENABLE_LOGGING
+#include "proteus/core/predict_api.hpp"     // for RequestParameters
 #include "proteus/helpers/queue.hpp"        // for BlockingConcurrentQueue
 #include "proteus/observation/logging.hpp"  // for LoggerPtr
 
@@ -100,6 +99,12 @@ class Manager {
     return instance;
   }
 
+  Manager(Manager const&) = delete;             ///< Copy constructor
+  Manager& operator=(const Manager&) = delete;  ///< Copy assignment constructor
+  Manager(Manager&& other) = delete;            ///< Move constructor
+  Manager& operator=(Manager&& other) =
+    delete;  ///< Move assignment constructor
+
   std::string loadWorker(std::string const& key, RequestParameters parameters);
   void unloadWorker(std::string const& key);
 
@@ -183,13 +188,6 @@ class Manager {
    * @param input_queue queue where update requests will arrive
    */
   void update_manager(UpdateCommandQueue* input_queue);
-
- public:
-  Manager(Manager const&) = delete;             ///< Copy constructor
-  Manager& operator=(const Manager&) = delete;  ///< Copy assignment constructor
-  Manager(Manager&& other) = delete;            ///< Move constructor
-  Manager& operator=(Manager&& other) =
-    delete;  ///< Move assignment constructor
 };
 
 }  // namespace proteus
diff --git a/src/proteus/core/predict_api.proto b/src/proteus/core/predict_api.proto
new file mode 100644
index 000000000..cab9bb7a1
--- /dev/null
+++ b/src/proteus/core/predict_api.proto
@@ -0,0 +1,318 @@
+// Copyright 2020 kubeflow.org.
+// Copyright 2022 Xilinx Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+package inference;
+
+// Inference Server GRPC endpoints.
+service GRPCInferenceService
+{
+  // The ServerLive API indicates if the inference server is able to receive
+  // and respond to metadata and inference requests.
+  rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
+
+  // The ServerReady API indicates if the server is ready for inferencing.
+  rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
+
+  // The ModelReady API indicates if a specific model is ready for inferencing.
+  rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}
+
+  // The ServerMetadata API provides information about the server. Errors are
+  // indicated by the google.rpc.Status returned for the request. The OK code
+  // indicates success and other codes indicate failure.
+  rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}
+
+  // The per-model metadata API provides information about a model. Errors are
+  // indicated by the google.rpc.Status returned for the request. The OK code
+  // indicates success and other codes indicate failure.
+  rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
+
+  // The ModelInfer API performs inference using the specified model. Errors are
+  // indicated by the google.rpc.Status returned for the request. The OK code
+  // indicates success and other codes indicate failure.
+  rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
+}
+
+message ServerLiveRequest {}
+
+message ServerLiveResponse{
+  // True if the inference server is live, false if not live.
+  bool live = 1;
+}
+
+message ServerReadyRequest {}
+
+message ServerReadyResponse{
+  // True if the inference server is ready, false if not ready.
+  bool ready = 1;
+}
+
+message ModelReadyRequest{
+  // The name of the model to check for readiness.
+  string name = 1;
+
+  // The version of the model to check for readiness. If not given the
+  // server will choose a version based on the model and internal policy.
+  string version = 2;
+}
+
+message ModelReadyResponse{
+  // True if the model is ready, false if not ready.
+  bool ready = 1;
+}
+
+message ServerMetadataRequest {}
+
+message ServerMetadataResponse{
+  // The server name.
+  string name = 1;
+
+  // The server version.
+  string version = 2;
+
+  // The extensions supported by the server.
+  repeated string extensions = 3;
+}
+
+message ModelMetadataRequest{
+  // The name of the model.
+  string name = 1;
+
+  // The version of the model to check for readiness. If not given the
+  // server will choose a version based on the model and internal policy.
+  string version = 2;
+}
+
+message ModelMetadataResponse{
+  // Metadata for a tensor.
+  message TensorMetadata
+  {
+    // The tensor name.
+    string name = 1;
+
+    // The tensor data type.
+    string datatype = 2;
+
+    // The tensor shape. A variable-size dimension is represented
+    // by a -1 value.
+    repeated int64 shape = 3;
+  }
+
+  // The model name.
+  string name = 1;
+
+  // The versions of the model available on the server.
+  repeated string versions = 2;
+
+  // The model's platform. See Platforms.
+  string platform = 3;
+
+  // The model's inputs.
+  repeated TensorMetadata inputs = 4;
+
+  // The model's outputs.
+  repeated TensorMetadata outputs = 5;
+}
+
+message ModelInferRequest{
+  // An input tensor for an inference request.
+  message InferInputTensor
+  {
+    // The tensor name.
+    string name = 1;
+
+    // The tensor data type.
+    string datatype = 2;
+
+    // The tensor shape.
+    repeated int64 shape = 3;
+
+    // Optional inference input tensor parameters.
+    map<string, InferParameter> parameters = 4;
+
+    // The tensor contents using a data-type format. This field must
+    // not be specified if "raw" tensor contents are being used for
+    // the inference request.
+    InferTensorContents contents = 5;
+  }
+
+  // An output tensor requested for an inference request.
+  message InferRequestedOutputTensor
+  {
+    // The tensor name.
+    string name = 1;
+
+    // Optional requested output tensor parameters.
+    map<string, InferParameter> parameters = 2;
+  }
+
+  // The name of the model to use for inferencing.
+  string model_name = 1;
+
+  // The version of the model to use for inference. If not given the
+  // server will choose a version based on the model and internal policy.
+  string model_version = 2;
+
+  // Optional identifier for the request. If specified will be
+  // returned in the response.
+  string id = 3;
+
+  // Optional inference parameters.
+  map<string, InferParameter> parameters = 4;
+
+  // The input tensors for the inference.
+  repeated InferInputTensor inputs = 5;
+
+  // The requested output tensors for the inference. Optional, if not
+  // specified all outputs produced by the model will be returned.
+  repeated InferRequestedOutputTensor outputs = 6;
+
+  // The data contained in an input tensor can be represented in "raw"
+  // bytes form or in the repeated type that matches the tensor's data
+  // type. To use the raw representation 'raw_input_contents' must be
+  // initialized with data for each tensor in the same order as
+  // 'inputs'. For each tensor, the size of this content must match
+  // what is expected by the tensor's shape and data type. The raw
+  // data must be the flattened, one-dimensional, row-major order of
+  // the tensor elements without any stride or padding between the
+  // elements. Note that the FP16 data type must be represented as raw
+  // content as there is no specific data type for a 16-bit float
+  // type.
+  //
+  // If this field is specified then InferInputTensor::contents must
+  // not be specified for any input tensor.
+  repeated bytes raw_input_contents = 7;
+}
+
+message ModelInferResponse{
+  // An output tensor returned for an inference request.
+  message InferOutputTensor
+  {
+    // The tensor name.
+    string name = 1;
+
+    // The tensor data type.
+    string datatype = 2;
+
+    // The tensor shape.
+    repeated int64 shape = 3;
+
+    // Optional output tensor parameters.
+    map<string, InferParameter> parameters = 4;
+
+    // The tensor contents using a data-type format. This field must
+    // not be specified if "raw" tensor contents are being used for
+    // the inference response.
+    InferTensorContents contents = 5;
+  }
+
+  // The name of the model used for inference.
+  string model_name = 1;
+
+  // The version of the model used for inference.
+  string model_version = 2;
+
+  // The id of the inference request if one was specified.
+  string id = 3;
+
+  // Optional inference response parameters.
+  map<string, InferParameter> parameters = 4;
+
+  // The output tensors holding inference results.
+  repeated InferOutputTensor outputs = 5;
+
+  // The data contained in an output tensor can be represented in
+  // "raw" bytes form or in the repeated type that matches the
+  // tensor's data type. To use the raw representation 'raw_output_contents'
+  // must be initialized with data for each tensor in the same order as
+  // 'outputs'. For each tensor, the size of this content must match
+  // what is expected by the tensor's shape and data type. The raw
+  // data must be the flattened, one-dimensional, row-major order of
+  // the tensor elements without any stride or padding between the
+  // elements. Note that the FP16 data type must be represented as raw
+  // content as there is no specific data type for a 16-bit float
+  // type.
+  //
+  // If this field is specified then InferOutputTensor::contents must
+  // not be specified for any output tensor.
+  repeated bytes raw_output_contents = 6;
+}
+
+// An inference parameter value. The Parameters message describes a
+// "name"/"value" pair, where the "name" is the name of the parameter
+// and the "value" is a boolean, integer, or string corresponding to
+// the parameter.
+message InferParameter{
+  // The parameter value can be a string, an int64, a boolean
+  // or a message specific to a predefined parameter.
+  oneof parameter_choice
+  {
+    // A boolean parameter value.
+    bool bool_param = 1;
+
+    // An int64 parameter value.
+    int64 int64_param = 2;
+
+    // A string parameter value.
+    string string_param = 3;
+  }
+}
+
+// The data contained in a tensor represented by the repeated type
+// that matches the tensor's data type. Protobuf oneof is not used
+// because oneofs cannot contain repeated fields.
+message InferTensorContents{
+  // Representation for BOOL data type. The size must match what is
+  // expected by the tensor's shape. The contents must be the flattened,
+  // one-dimensional, row-major order of the tensor elements.
+  repeated bool bool_contents = 1;
+
+  // Representation for INT8, INT16, and INT32 data types. The size
+  // must match what is expected by the tensor's shape. The contents
+  // must be the flattened, one-dimensional, row-major order of the
+  // tensor elements.
+  repeated int32 int_contents = 2;
+
+  // Representation for INT64 data types. The size must match what
+  // is expected by the tensor's shape. The contents must be the
+  // flattened, one-dimensional, row-major order of the tensor elements.
+  repeated int64 int64_contents = 3;
+
+  // Representation for UINT8, UINT16, and UINT32 data types. The size
+  // must match what is expected by the tensor's shape. The contents
+  // must be the flattened, one-dimensional, row-major order of the
+  // tensor elements.
+  repeated uint32 uint_contents = 4;
+
+  // Representation for UINT64 data types. The size must match what
+  // is expected by the tensor's shape. The contents must be the
+  // flattened, one-dimensional, row-major order of the tensor elements.
+  repeated uint64 uint64_contents = 5;
+
+  // Representation for FP32 data type. The size must match what is
+  // expected by the tensor's shape. The contents must be the flattened,
+  // one-dimensional, row-major order of the tensor elements.
+  repeated float fp32_contents = 6;
+
+  // Representation for FP64 data type. The size must match what is
+  // expected by the tensor's shape. The contents must be the flattened,
+  // one-dimensional, row-major order of the tensor elements.
+  repeated double fp64_contents = 7;
+
+  // Representation for BYTES data type. The size must match what is
+  // expected by the tensor's shape. The contents must be the flattened,
+  // one-dimensional, row-major order of the tensor elements.
+  repeated bytes bytes_contents = 8;
+}
diff --git a/src/proteus/core/predict_api_internal.cpp b/src/proteus/core/predict_api_internal.cpp
index b75943e83..9d169b543 100644
--- a/src/proteus/core/predict_api_internal.cpp
+++ b/src/proteus/core/predict_api_internal.cpp
@@ -24,8 +24,10 @@
 
 #include <numeric>  // for accumulate
 
+#include "predict_api.grpc.pb.h"
 #include "proteus/buffers/buffer.hpp"
 #include "proteus/observation/logging.hpp"  // for getLogger, SPDLOG_LOGGER_...
+#include "proteus/servers/grpc_server.hpp"
 
 namespace proteus {
 
@@ -110,6 +112,40 @@ DataType parseDatatypeStr(const std::string &data_type_str) {
   return data_type;
 }
 
+class InferenceRequestInputBuilder {
+ public:
+  /**
+   * @brief Construct a new InferenceRequestInput object
+   *
+   * @param req the JSON request from the user
+   * @param input_buffer buffer to hold the incoming data
+   * @param offset offset for the buffer to store data at
+   */
+  static InferenceRequestInput fromJson(std::shared_ptr<Json::Value> const &req,
+                                        Buffer *input_buffer, size_t offset);
+
+  /**
+   * @brief Construct a new InferenceRequestInput object
+   *
+   * @param req an existing InferenceRequestInput to copy
+   * @param input_buffer buffer to hold the incoming data
+   * @param offset offset for the buffer to store data at
+   */
+  static InferenceRequestInput fromInput(InferenceRequestInput &req,
+                                         Buffer *input_buffer, size_t offset);
+
+  /**
+   * @brief Construct a new InferenceRequestInput object
+   *
+   * @param req an existing InferenceRequestInput to copy
+   * @param input_buffer buffer to hold the incoming data
+   * @param offset offset for the buffer to store data at
+   */
+  static InferenceRequestInput fromGrpc(
+    const inference::ModelInferRequest_InferInputTensor &req,
+    Buffer *input_buffer, size_t offset);
+};
+
 InferenceRequestInput InferenceRequestInputBuilder::fromJson(
   std::shared_ptr<Json::Value> const &req, Buffer *input_buffer,
   size_t offset) {
@@ -232,6 +268,42 @@ InferenceRequestInput InferenceRequestInputBuilder::fromInput(
   return input;
 }
 
+InferenceRequestInput InferenceRequestInputBuilder::fromGrpc(
+  const inference::ModelInferRequest_InferInputTensor &req,
+  Buffer *input_buffer, size_t offset) {
+  InferenceRequestInput input;
+  input.shared_data_ = nullptr;
+  input.name_ = req.name();
+  input.shape_.reserve(req.shape_size());
+  for (const auto &index : req.shape()) {
+    input.shape_.push_back(static_cast<size_t>(index));
+  }
+  input.dataType_ = types::mapStrToType(req.datatype());
+
+  input.parameters_ = std::make_shared<RequestParameters>();
+  const auto &params = req.parameters();
+  for (const auto &[key, value] : params) {
+    if (value.has_bool_param()) {
+      input.parameters_->put(key, value.bool_param());
+    } else if (value.has_int64_param()) {
+      // TODO(varunsh): parameters should switch to uint64?
+      input.parameters_->put(key, static_cast<int>(value.int64_param()));
+    } else {
+      input.parameters_->put(key, value.string_param());
+    }
+  }
+
+  auto size = std::accumulate(input.shape_.begin(), input.shape_.end(), 1,
+                              std::multiplies<>()) *
+              types::getSize(input.dataType_);
+  auto *dest = static_cast<std::byte *>(input_buffer->data()) + offset;
+  // TODO(varunsh): is this legal or do we need to check the type?
+  memcpy(dest, req.contents().bool_contents().data(), size);
+
+  input.data_ = dest;
+  return input;
+}
+
 InferenceRequestOutput InferenceRequestOutputBuilder::fromJson(
   std::shared_ptr<Json::Value> const &req) {
   InferenceRequestOutput output;
@@ -450,6 +522,115 @@ InferenceRequestPtr InferenceRequestBuilder::fromJson(
   return request;
 }
 
+InferenceRequestPtr InferenceRequestBuilder::fromGrpc(
+  CallDataModelInfer *calldata, size_t &buffer_index,
+  const std::vector<BufferRawPtrs> &input_buffers,
+  std::vector<size_t> &input_offsets,
+  const std::vector<BufferRawPtrs> &output_buffers,
+  std::vector<size_t> &output_offsets, const size_t &batch_size,
+  size_t &batch_offset) {
+  auto request = std::make_shared<InferenceRequest>();
+  auto &grpc_request = calldata->getRequest();
+
+  request->id_ = grpc_request.id();
+
+  request->parameters_ = std::make_shared<RequestParameters>();
+  const auto &params = grpc_request.parameters();
+  for (const auto &[key, value] : params) {
+    if (value.has_bool_param()) {
+      request->parameters_->put(key, value.bool_param());
+    } else if (value.has_int64_param()) {
+      // TODO(varunsh): parameters should switch to uint64?
+      request->parameters_->put(key, static_cast<int>(value.int64_param()));
+    } else {
+      request->parameters_->put(key, value.string_param());
+    }
+  }
+
+  request->callback_ = nullptr;
+
+  auto buffer_index_backup = buffer_index;
+  auto batch_offset_backup = batch_offset;
+
+  for (const auto &input : grpc_request.inputs()) {
+    try {
+      auto buffers = input_buffers[buffer_index];
+      for (size_t i = 0; i < buffers.size(); i++) {
+        auto &buffer = buffers[i];
+        auto &offset = input_offsets[buffer_index];
+
+        request->inputs_.push_back(std::move(
+          InferenceRequestInputBuilder::fromGrpc(input, buffer, offset)));
+        offset += request->inputs_.back().getSize();
+      }
+    } catch (const std::invalid_argument &e) {
+      throw;
+    }
+    batch_offset++;
+    if (batch_offset == batch_size) {
+      batch_offset = 0;
+      buffer_index++;
+      // std::fill(input_offsets.begin(), input_offsets.end(), 0);
+    }
+  }
+
+  // TODO(varunsh): output_offset is currently ignored! The size of the output
+  // needs to come from the worker but we have no such information.
+  buffer_index = buffer_index_backup;
+  batch_offset = batch_offset_backup;
+
+  if (grpc_request.outputs_size() != 0) {
+    for (auto &output : grpc_request.outputs()) {
+      // TODO(varunsh): we're ignoring incoming output data
+      (void)output;
+      try {
+        auto buffers = output_buffers[buffer_index];
+        for (size_t i = 0; i < buffers.size(); i++) {
+          auto &buffer = buffers[i];
+          auto &offset = output_offsets[buffer_index];
+
+          request->outputs_.emplace_back();
+          request->outputs_.back().setData(
+            static_cast<std::byte *>(buffer->data()) + offset);
+        }
+      } catch (const std::invalid_argument &e) {
+        throw;
+      }
+      batch_offset++;
+      if (batch_offset == batch_size) {
+        batch_offset = 0;
+        buffer_index++;
+        std::fill(output_offsets.begin(), output_offsets.end(), 0);
+      }
+    }
+  } else {
+    for (const auto &input : grpc_request.inputs()) {
+      (void)input;  // suppress unused variable warning
+      try {
+        auto buffers = output_buffers[buffer_index];
+        for (size_t j = 0; j < buffers.size(); j++) {
+          auto &buffer = buffers[j];
+          const auto &offset = output_offsets[buffer_index];
+
+          request->outputs_.emplace_back();
+          request->outputs_.back().setData(
+            static_cast<std::byte *>(buffer->data()) + offset);
+        }
+      } catch (const std::invalid_argument &e) {
+        throw;
+      }
+      batch_offset++;
+      if (batch_offset == batch_size) {
+        batch_offset = 0;
+        buffer_index++;
+        std::fill(output_offsets.begin(), output_offsets.end(), 0);
+      }
+    }
+  }
+
+  return request;
+}
+
 Json::Value ModelMetadataTensorToJson(const ModelMetadataTensor &metadata) {
   Json::Value ret;
   ret["name"] = metadata.getName();
diff --git a/src/proteus/core/predict_api_internal.hpp b/src/proteus/core/predict_api_internal.hpp
index 543f96d88..91cfaedc4 100644
--- a/src/proteus/core/predict_api_internal.hpp
+++ b/src/proteus/core/predict_api_internal.hpp
@@ -29,6 +29,8 @@ class Value;
 
 namespace proteus {
 
+class CallDataModelInfer;
+
 /**
  * @brief Convert JSON-styled parameters to Proteus's implementation
  *
@@ -37,29 +39,6 @@ namespace proteus {
  */
 RequestParametersPtr addParameters(Json::Value parameters);
 
-class InferenceRequestInputBuilder {
- public:
-  /**
-   * @brief Construct a new InferenceRequestInput object
-   *
-   * @param req the JSON request from the user
-   * @param input_buffer buffer to hold the incoming data
-   * @param offset offset for the buffer to store data at
-   */
-  static InferenceRequestInput fromJson(std::shared_ptr<Json::Value> const &req,
-                                        Buffer *input_buffer, size_t offset);
-
-  /**
-   * @brief Construct a new InferenceRequestInput object
-   *
-   * @param req an existing InferenceRequestInput to copy
-   * @param input_buffer buffer to hold the incoming data
-   * @param offset offset for the buffer to store data at
-   */
-  static InferenceRequestInput fromInput(InferenceRequestInput &req,
-                                         Buffer *input_buffer, size_t offset);
-};
-
 class InferenceRequestOutputBuilder {
  public:
   static InferenceRequestOutput fromJson(
@@ -109,6 +88,27 @@ class InferenceRequestBuilder {
     const std::vector<BufferRawPtrs> &output_buffers,
     std::vector<size_t> &output_offsets, const size_t &batch_size,
     size_t &batch_offset);
+
+  /**
+   * @brief Construct a new InferenceRequest object
+   *
+   * @param req one inference request object
+   * @param buffer_index current buffer index to start with for the buffers
+   * @param input_buffers a vector of input buffers to store the inputs data
+   * @param input_offsets a vector of offsets for the input buffers to store
+   * data
+   * @param output_buffers a vector of output buffers
+   * @param output_offsets a vector of offsets for the output buffers
+   * @param batch_size batch size to use when creating the request
+   * @param batch_offset current batch offset to start with
+   */
+  static InferenceRequestPtr fromGrpc(
+    CallDataModelInfer *calldata, size_t &buffer_index,
+    const std::vector<BufferRawPtrs> &input_buffers,
+    std::vector<size_t> &input_offsets,
+    const std::vector<BufferRawPtrs> &output_buffers,
+    std::vector<size_t> &output_offsets, const size_t &batch_size,
+    size_t &batch_offset);
 };
 
 /// convert the metadata to a JSON representation compatible with the server
diff --git a/src/proteus/main.cpp b/src/proteus/main.cpp
index f9c06f7da..e3c6d630e 100644
--- a/src/proteus/main.cpp
+++ b/src/proteus/main.cpp
@@ -25,6 +25,7 @@
 
 #include "proteus/build_options.hpp"        // for PROTEUS_ENABLE_HTTP, PROT...
 #include "proteus/clients/native.hpp"       // for initialize, terminate
+#include "proteus/servers/grpc_server.hpp"  // for start, stop
 #include "proteus/servers/http_server.hpp"  // for start, stop
 
 /**
@@ -77,9 +78,18 @@ int main(int argc, char* argv[]) {
 
   proteus::initialize();
 
+#ifdef PROTEUS_ENABLE_GRPC
+  std::cout << "gRPC server starting at port " << 50051 << "\n";
+  proteus::grpc::start("0.0.0.0:50051");
+#endif
+
 #ifdef PROTEUS_ENABLE_HTTP
-  std::cout << "Server starting at port " << http_port << "\n";
+  std::cout << "HTTP server starting at port " << http_port << "\n";
   proteus::http::start(http_port);
+#else
+  while (1) {
+    std::this_thread::yield();
+  }
 #endif
 
   proteus::terminate();
diff --git a/src/proteus/servers/CMakeLists.txt b/src/proteus/servers/CMakeLists.txt
index d339e8827..17b5e57ad 100644
--- a/src/proteus/servers/CMakeLists.txt
+++ b/src/proteus/servers/CMakeLists.txt
@@ -18,6 +18,10 @@ if(${PROTEUS_ENABLE_REST})
     set(targets ${targets} http_server websocket_server)
 endif()
 
+if(${PROTEUS_ENABLE_GRPC})
+    set(targets ${targets} grpc_server)
+endif()
+
 foreach(target ${targets})
     add_library(${target} OBJECT ${target}.cpp)
     target_include_directories(${target} PRIVATE ${PROTEUS_INCLUDE_DIRS})
@@ -34,6 +38,11 @@ if(${PROTEUS_ENABLE_REST})
     target_link_libraries(websocket_server INTERFACE observation drogon trantor)
 endif()
 
+if(${PROTEUS_ENABLE_GRPC})
+    target_include_directories(grpc_server PRIVATE $<TARGET_PROPERTY:lib_grpc,INCLUDE_DIRECTORIES>)
+    target_link_libraries(grpc_server INTERFACE lib_grpc)
+endif()
+
 add_library(servers INTERFACE)
 target_link_libraries(servers INTERFACE
     ${SERVER_OBJS}
diff --git a/src/proteus/servers/grpc_server.cpp b/src/proteus/servers/grpc_server.cpp
new file mode 100644
index 000000000..904a707d3
--- /dev/null
+++ b/src/proteus/servers/grpc_server.cpp
@@ -0,0 +1,317 @@
+// Copyright 2022 Xilinx Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * @file
+ * @brief Implements the gRPC server in Proteus
+ */
+
+#include "proteus/servers/grpc_server.hpp"
+
+#include "predict_api.grpc.pb.h"
+#include "proteus/batching/batcher.hpp"
+#include "proteus/core/interface.hpp"
+#include "proteus/core/manager.hpp"
+#include "proteus/core/predict_api_internal.hpp"
+#include "proteus/core/worker_info.hpp"
+#include "proteus/helpers/queue.hpp"
+#include "proteus/observation/logging.hpp"
+#include "proteus/version.hpp"
+
+// use aliases to prevent clashes between grpc:: and proteus::grpc::
+using ServerBuilder = grpc::ServerBuilder;
+using ServerCompletionQueue = grpc::ServerCompletionQueue;
+template <typename T>
+using ServerAsyncResponseWriter = grpc::ServerAsyncResponseWriter<T>;
+using ServerContext = grpc::ServerContext;
+using Server = grpc::Server;
+using Status = grpc::Status;
+using StatusCode = grpc::StatusCode;
+
+namespace proteus {
+
+#define CALLDATA_IMPL(endpoint)                                               \
+  class CallData##endpoint : public CallData<inference::endpoint##Request,    \
+                                             inference::endpoint##Response> { \
+   public:                                                                    \
+    CallData##endpoint(AsyncService* service, ServerCompletionQueue* cq)      \
+      : CallData(service, cq) {                                               \
+      proceed();                                                              \
+    };                                                                        \
+                                                                              \
+   protected:                                                                 \
+    void addNewCallData() override { new CallData##endpoint(service_, cq_); } \
+    void waitForRequest() override {                                          \
+      service_->Request##endpoint(&ctx_, &request_, &responder_, cq_, cq_,    \
+                                  this);                                      \
+    }                                                                         \
+    void handleRequest() override
+
+#define CALLDATA_IMPL_END \
+  }                       \
+  ;
+
+CALLDATA_IMPL(ServerLive) {
+  reply_.set_live(true);
+  finish();
+}
+CALLDATA_IMPL_END
+
+CALLDATA_IMPL(ServerReady) {
+  reply_.set_ready(true);
+  finish();
+}
+CALLDATA_IMPL_END
+
+CALLDATA_IMPL(ModelReady) {
+  auto& model = request_.name();
+  try {
+    if (!Manager::getInstance().workerReady(model)) {
+      reply_.set_ready(false);
+    }
+    finish();
+  } catch (const std::invalid_argument& e) {
+    reply_.set_ready(false);
+    finish(Status(StatusCode::NOT_FOUND, e.what()));
+  }
+}
+CALLDATA_IMPL_END
+
+void parseResponse(CallDataModelInfer* calldata, InferenceResponse response) {
+  auto& reply = calldata->getReply();
+  reply.set_model_name(response.getModel());
+  reply.set_id(response.getID());
+  auto outputs = response.getOutputs();
+  for (InferenceResponseOutput& output : outputs) {
+    auto* tensor = reply.add_outputs();
+    tensor->set_name(output.getName());
+    // auto* parameters = tensor->mutable_parameters();
+    tensor->set_datatype(types::mapTypeToStr(output.getDatatype()));
+    auto shape = output.getShape();
+    for (const size_t& index : shape) {
+      tensor->add_shape(index);
+    }
+    tensor->set_allocated_contents(
+      static_cast<inference::InferTensorContents*>(output.getData()));
+  }
+}
+
+void grpcCallback(CallDataModelInfer* calldata,
+                  const InferenceResponse& response) {
+  if (response.isError()) {
+    calldata->finish(
+      Status(StatusCode::UNKNOWN, std::string{response.getError()}));
+    return;
+  } else {
+    try {
+      parseResponse(calldata, response);
+    } catch (const std::invalid_argument& e) {
+      calldata->finish(Status(StatusCode::UNKNOWN, e.what()));
+      return;
+    }
+  }
+  // #ifdef PROTEUS_ENABLE_TRACING
+  //   const auto &context = response.getContext();
+  //   propagate(resp.get(), context);
+  // #endif
+  calldata->finish();
+}
+
+class GrpcApi : public Interface {
+ public:
+  /**
+   * @brief Construct a new DrogonHttp object
+   *
+   * @param req
+   * @param callback
+   */
+  GrpcApi(CallDataModelInfer* calldata) : calldata_(calldata){};
+
+  std::shared_ptr<InferenceRequest> getRequest(
+    size_t& buffer_index, const std::vector<BufferRawPtrs>& input_buffers,
+    std::vector<size_t>& input_offsets,
+    const std::vector<BufferRawPtrs>& output_buffers,
+    std::vector<size_t>& output_offsets, const size_t& batch_size,
+    size_t& batch_offset) override {
+    try {
+      auto request = InferenceRequestBuilder::fromGrpc(
+        this->calldata_, buffer_index, input_buffers, input_offsets,
+        output_buffers, output_offsets, batch_size, batch_offset);
+      Callback callback =
+        std::bind(grpcCallback, this->calldata_, std::placeholders::_1);
+      request->setCallback(std::move(callback));
+      return request;
+    } catch (const std::invalid_argument& e) {
+      SPDLOG_LOGGER_INFO(this->logger_, e.what());
+      errorHandler(e);
+      return nullptr;
+    }
+  }
+
+  size_t getInputSize() override {
+    return calldata_->getRequest().inputs_size();
+  }
+
+  void errorHandler(const std::invalid_argument& e) override {
+    SPDLOG_INFO(e.what());
+    calldata_->finish(Status(StatusCode::NOT_FOUND, e.what()));
+  }
+
+ private:
+  CallDataModelInfer* calldata_;
+};
+
+CALLDATA_IMPL(ServerMetadata) {
+  reply_.set_name("proteus");
+  reply_.set_version(kProteusVersion);
+#ifdef PROTEUS_ENABLE_AKS
+  reply_.add_extensions("aks");
+#endif
+#ifdef PROTEUS_ENABLE_VITIS
+  reply_.add_extensions("vitis");
+#endif
+  finish();
+}
+CALLDATA_IMPL_END
+
+CallDataModelInfer::CallDataModelInfer(AsyncService* service,
+                                       ServerCompletionQueue* cq)
+  : CallData(service, cq) {
+  proceed();
+}
+
+void CallDataModelInfer::addNewCallData() {
+  new CallDataModelInfer(service_, cq_);
+}
+
+void CallDataModelInfer::waitForRequest() {
+  service_->RequestModelInfer(&ctx_, &request_, &responder_, cq_, cq_, this);
+}
+
+void CallDataModelInfer::handleRequest() {
+  auto& model = request_.model_name();
+
+  WorkerInfo* worker = nullptr;
+  try {
+    worker = Manager::getInstance().getWorker(model);
+  } catch (const std::invalid_argument& e) {
+    SPDLOG_INFO(e.what());
+    finish(Status(StatusCode::NOT_FOUND, "Worker " + model + " not found"));
+    return;
+  }
+
+  auto request = std::make_unique<GrpcApi>(this);
+  // #ifdef PROTEUS_ENABLE_METRICS
+  //   request->set_time(now);
+  // #endif
+  auto* batcher = worker->getBatcher();
+  // #ifdef PROTEUS_ENABLE_TRACING
+  //   trace->endSpan();
+  //   request->setTrace(std::move(trace));
+  // #endif
+  batcher->enqueue(std::move(request));
+}
+
+inference::ModelInferRequest& CallDataModelInfer::getRequest() {
+  return this->request_;
+}
+
+inference::ModelInferResponse& CallDataModelInfer::getReply() {
+  return this->reply_;
+}
+
+class GrpcServer final {
+ public:
+  /// Get the singleton GrpcServer instance
+  static GrpcServer& getInstance() { return create("", -1); };
+
+  static GrpcServer& create(const std::string& address, const int cq_count) {
+    static GrpcServer server(address, cq_count);
+    return server;
+  };
+
+  GrpcServer(GrpcServer const&) = delete;  ///< Copy constructor
+  GrpcServer& operator=(const GrpcServer&) =
+    delete;                                 ///< Copy assignment constructor
+  GrpcServer(GrpcServer&& other) = delete;  ///< Move constructor
+  GrpcServer& operator=(GrpcServer&& other) =
+    delete;  ///< Move assignment constructor
+
+  ~GrpcServer() {
+    server_->Shutdown();
+    // Always shutdown the completion queues after the server.
+    for (auto& cq : cq_) {
+      cq->Shutdown();
+    }
+  };
+
+ private:
+  GrpcServer(const std::string& address, const int cq_count) {
+    ServerBuilder builder;
+    // Listen on the given address without any authentication mechanism.
+    builder.AddListeningPort(address, ::grpc::InsecureServerCredentials());
+    // Register "service_" as the instance through which we'll communicate with
+    // clients. In this case it corresponds to an *asynchronous* service.
+    builder.RegisterService(&service_);
+    // Get hold of the completion queue used for the asynchronous communication
+    // with the gRPC runtime.
+    for (auto i = 0; i < cq_count; i++) {
+      cq_.push_back(builder.AddCompletionQueue());
+    }
+    // Finally assemble the server.
+    server_ = builder.BuildAndStart();
+
+    // Start threads to handle incoming RPCs
+    for (auto i = 0; i < cq_count; i++) {
+      threads_.emplace_back(&GrpcServer::handleRpcs, this, i);
+      // just detach threads for now to simplify shutdown
+      threads_.back().detach();
+    }
+  };
+
+  // This can be run in multiple threads if needed.
+  void handleRpcs(int index) {
+    auto& my_cq = cq_.at(index);
+
+    // Spawn a new CallData instance to serve new clients.
+    new CallDataServerLive(&service_, my_cq.get());
+    void* tag;  // uniquely identifies a request.
+    bool ok;
+    while (true) {
+      // Block waiting to read the next event from the completion queue. The
+      // event is uniquely identified by its tag, which in this case is the
+      // memory address of a CallDataBase instance.
+      // The return value of Next should always be checked. This return value
+      // tells us whether there is any kind of event or cq_ is shutting down.
+      GPR_ASSERT(my_cq->Next(&tag, &ok));
+      GPR_ASSERT(ok);
+      static_cast<CallDataBase*>(tag)->proceed();
+    }
+  };
+
+  std::vector<std::unique_ptr<::grpc::ServerCompletionQueue>> cq_;
+  inference::GRPCInferenceService::AsyncService service_;
+  std::unique_ptr<::grpc::Server> server_;
+  std::vector<std::thread> threads_;
+};
+
+namespace grpc {
+
+void start(const std::string& address) { GrpcServer::create(address, 1); }
+
+void stop() { GrpcServer::getInstance().~GrpcServer(); }
+
+}  // namespace grpc
+
+}  // namespace proteus
diff --git a/src/proteus/servers/grpc_server.hpp b/src/proteus/servers/grpc_server.hpp
new file mode 100644
index 000000000..8ffe1e493
--- /dev/null
+++ b/src/proteus/servers/grpc_server.hpp
@@ -0,0 +1,144 @@
+// Copyright 2022 Xilinx Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * @file
+ * @brief Defines the gRPC server in Proteus
+ */
+
+#ifndef GUARD_PROTEUS_SERVERS_GRPC_SERVER
+#define GUARD_PROTEUS_SERVERS_GRPC_SERVER
+
+#include "proteus/build_options.hpp"
+
+#ifdef PROTEUS_ENABLE_GRPC
+
+#include <grpcpp/grpcpp.h>
+
+#include <memory>
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "predict_api.grpc.pb.h"
+
+namespace proteus {
+
+using AsyncService = inference::GRPCInferenceService::AsyncService;
+
+class CallDataBase {
+ public:
+  virtual void proceed() = 0;
+};
+
+using AsyncService = inference::GRPCInferenceService::AsyncService;
+
+template <typename RequestType, typename ReplyType>
+class CallData : public CallDataBase {
+ public:
+  // Take in the "service" instance (in this case representing an asynchronous
+  // server) and the completion queue "cq" used for asynchronous communication
+  // with the gRPC runtime.
+  CallData(AsyncService* service, ::grpc::ServerCompletionQueue* cq)
+    : service_(service), cq_(cq), responder_(&ctx_), status_(CREATE) {}
+
+  virtual ~CallData() = default;
+
+  void proceed() override {
+    if (status_ == CREATE) {
+      // Make this instance progress to the PROCESS state.
+      status_ = PROCESS;
+
+      waitForRequest();
+    } else if (status_ == PROCESS) {
+      addNewCallData();
+
+      // queue_->enqueue(this);
+      // status_ = WAIT;
+      handleRequest();
+      status_ = WAIT;
+    } else if (status_ == WAIT) {
+      std::this_thread::yield();
+    } else {
+      GPR_ASSERT(status_ == FINISH);
+      // Once in the FINISH state, deallocate ourselves (CallData).
+      delete this;
+    }
+  }
+
+  void finish(const ::grpc::Status& status = ::grpc::Status::OK) {
+    // And we are done! Let the gRPC runtime know we've finished, using the
+    // memory address of this instance as the uniquely identifying tag for
+    // the event.
+    status_ = FINISH;
+    responder_.Finish(reply_, status, this);
+  }
+
+ protected:
+  // When we handle a request of this type, we need to tell
+  // the completion queue to wait for new requests of the same type.
+  virtual void addNewCallData() = 0;
+
+  virtual void waitForRequest() = 0;
+  virtual void handleRequest() = 0;
+
+  // The means of communication with the gRPC runtime for an asynchronous
+  // server.
+  AsyncService* service_;
+  // The producer-consumer queue where for asynchronous server notifications.
+  ::grpc::ServerCompletionQueue* cq_;
+  // Context for the rpc, allowing to tweak aspects of it such as the use
+  // of compression, authentication, as well as to send metadata back to the
+  // client.
+  ::grpc::ServerContext ctx_;
+
+  // What we get from the client.
+  RequestType request_;
+  // What we send back to the client.
+  ReplyType reply_;
+
+  // The means to get back to the client.
+  ::grpc::ServerAsyncResponseWriter<ReplyType> responder_;
+
+  // Let's implement a tiny state machine with the following states.
+  enum CallStatus { CREATE, PROCESS, WAIT, FINISH };
+  CallStatus status_;  // The current serving state.
+};
+
+class CallDataModelInfer : public CallData<inference::ModelInferRequest,
+                                           inference::ModelInferResponse> {
+ public:
+  CallDataModelInfer(AsyncService* service, ::grpc::ServerCompletionQueue* cq);
+
+  inference::ModelInferRequest& getRequest();
+  inference::ModelInferResponse& getReply();
+
+ protected:
+  void addNewCallData() override;
+  void waitForRequest() override;
+  void handleRequest() override;
+};
+
+namespace grpc {
+
+void start(const std::string& address);
+void stop();
+
+}  // namespace grpc
+
+}  // namespace proteus
+
+#endif  // PROTEUS_ENABLE_GRPC
+
+#endif  // GUARD_PROTEUS_SERVERS_GRPC_SERVER
diff --git a/src/proteus/workers/CMakeLists.txt b/src/proteus/workers/CMakeLists.txt
index 4573f62f2..15750b95f 100644
--- a/src/proteus/workers/CMakeLists.txt
+++ b/src/proteus/workers/CMakeLists.txt
@@ -35,7 +35,7 @@ foreach( worker ${workers})
 
     add_library(${worker_name} SHARED ${file_name}.cpp)
     target_link_options(${worker_name} PUBLIC "LINKER:-E")
-    target_link_libraries(${worker_name} PRIVATE batching buffers core)
+    target_link_libraries(${worker_name} PRIVATE batching buffers core $<TARGET_OBJECTS:grpc_server>)
     target_include_directories(${worker_name} PRIVATE ${PROTEUS_INCLUDE_DIRS})
     enable_ipo_on_target(${worker_name})
 
@@ -68,7 +68,7 @@ if(${PROTEUS_ENABLE_AKS})
         add_library(${worker_name} SHARED ${file_name}.cpp)
         target_link_options(${worker_name} PUBLIC "LINKER:-E")
         target_link_libraries(${worker_name} PRIVATE
-            batching buffers core base64 parse_env observation
+            batching buffers core base64 parse_env observation $<TARGET_OBJECTS:grpc_server>
             aks vart-runner xir
             opencv_core opencv_imgcodecs
         )
@@ -85,7 +85,7 @@ if(${PROTEUS_ENABLE_AKS})
     add_library(workerAks SHARED aks.cpp)
     target_link_options(workerAks PUBLIC "LINKER:-E")
     target_link_libraries(workerAks
-        PRIVATE batching buffers core parse_env observation
+        PRIVATE batching buffers core parse_env observation $<TARGET_OBJECTS:grpc_server>
         PRIVATE aks vart-runner xir
     )
     target_include_directories(workerAks PRIVATE ${PROTEUS_INCLUDE_DIRS})
diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt
index b000ea28c..bcd5b0b57 100644
--- a/tests/cpp/CMakeLists.txt
+++ b/tests/cpp/CMakeLists.txt
@@ -14,6 +14,7 @@
 
 set(PROTEUS_TEST_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src)
 
+add_subdirectory(grpc)
 add_subdirectory(native)
 add_subdirectory(performance)
 add_subdirectory(src/proteus)
diff --git a/tests/cpp/grpc/CMakeLists.txt b/tests/cpp/grpc/CMakeLists.txt
new file mode 100644
index 000000000..18c3ed035
--- /dev/null
+++ b/tests/cpp/grpc/CMakeLists.txt
@@ -0,0 +1,24 @@
+# Copyright 2022 Xilinx Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_executable(grpc_server_live server_live.cpp)
+
+get_target_property(grpc_include lib_grpc INCLUDE_DIRECTORIES)
+target_include_directories(grpc_server_live PRIVATE ${grpc_include})
+target_link_libraries(grpc_server_live PRIVATE lib_grpc)
+
+add_executable(grpc_model_infer model_infer.cpp)
+
+target_include_directories(grpc_model_infer PRIVATE ${grpc_include})
+target_link_libraries(grpc_model_infer PRIVATE lib_grpc)
diff --git a/tools/autotest.sh b/tools/autotest.sh
index f551e3a9b..18aee4048 100755
--- a/tools/autotest.sh
+++ b/tools/autotest.sh
@@ -91,30 +91,3 @@ print_header "Testing the stable production docker image"
 
 ./proteus --dry-run up --profile autotest
 ./proteus up --profile autotest
-
-print_header "Building the nightly dev docker image"
-
-# use the --dry-run flag to log the "docker build" commands used
-./proteus --dry-run dockerize --vitis-nightly
-./proteus dockerize --vitis-nightly
-
-print_header "Building the nightly production docker image"
-
-./proteus --dry-run dockerize --production --vitis-nightly
-./proteus dockerize --production --vitis-nightly
-
-# These commands add <user>/proteus-dev:<proteus-version>.nightly and
-# <user>/proteus:<proteus-version>.nightly to the local machine. Append
-# "--build-meta <build num>" to the commands above to change the image tags to
-# <user>/proteus[-dev]:<proteus-version>.nightly+<build num>. They also update
-# the latest tags on these images
-
-print_header "Testing the nightly dev docker image"
-
-./proteus --dry-run up --profile autotest-dev
-./proteus up --profile autotest-dev
-
-print_header "Testing the nightly production docker image"
-
-./proteus --dry-run up --profile autotest
-./proteus up --profile autotest