diff --git a/README.md b/README.md index c580bec29..502273348 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,21 @@ curl http://localhost:3928/v1/chat/completions \ }' ``` +***OPTIONAL***: You can constrain the sampling using GBNF grammars by providing path to a grammar file +```bash title="Nitro Inference With Grammar" +curl http://localhost:3928/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + { + "role": "user", + "content": "Who won the world series in 2020?" + }, + ], + "grammar_file": "/path/to/grammarfile" + }' +``` + Table of parameters | Parameter | Type | Description | diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index f770fd066..45a0e49ba 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -194,7 +194,15 @@ void llamaCPP::chatCompletion( (*jsonBody).get("frequency_penalty", 0).asFloat(); data["presence_penalty"] = (*jsonBody).get("presence_penalty", 0).asFloat(); const Json::Value &messages = (*jsonBody)["messages"]; - + std::string grammar_file = (*jsonBody).get("grammar_file", "").asString(); + std::ifstream file(grammar_file); + if (!file) { + LOG_ERROR << "Grammar file not found"; + } else { + std::stringstream grammarBuf; + grammarBuf << file.rdbuf(); + data["grammar"] = grammarBuf.str(); + } if (!llama.multimodal) { for (const auto &message : messages) { diff --git a/examples/grammars/json.gbnf b/examples/grammars/json.gbnf new file mode 100644 index 000000000..8edcf1a4e --- /dev/null +++ b/examples/grammars/json.gbnf @@ -0,0 +1,25 @@ +root ::= object +value ::= object | array | string | number | ("true" | "false" | "null") ws + +object ::= + "{" ws ( + string ":" ws value + ("," ws string ":" ws value)* + )? "}" ws + +array ::= + "[" ws ( + value + ("," ws value)* + )? "]" ws + +string ::= + "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes + )* "\"" ws + +number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws + +# Optional space: by convention, applied in this grammar after literal chars when allowed +ws ::= ([ \t\n] ws)? \ No newline at end of file