In [1]:
from generate_declarative_sentences import generated_text_from_prompt, load_options_from_config_file, generated_json_from_prompt

## Setting the model type and parameters

### The table below describes the parameters that can be set to control the output of the model.

[github.com/ollama/](https://github.com/ollama/ollama/blob/main/docs/modelfile.md)

| Parameter      | Description                                                                                                                                                                                                                                                                                                                                                                | Value Type | Example Usage        |
|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|----------------------|
| num_ctx        | Sets the size of the context window used to generate the next token. (Default: 2048)                                                                                                                                                                                                                                                                                       | int        | num_ctx 4096         |
| repeat_last_n  | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)                                                                                                                                                                                                                                                              | int        | repeat_last_n 64     |
| repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)                                                                                                                                                                                        | float      | repeat_penalty 1.1   |
| temperature    | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)                                                                                                                                                                                                                                                        | float      | temperature 0.7      |
| seed           | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)                                                                                                                                                                                                          | int        | seed 42              |
| stop           | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate stop parameters in a modelfile.                                                                                                                                                           | string     | stop "AI assistant:" |
| num_predict    | Maximum number of tokens to predict when generating text. (Default: -1, infinite generation)                                                                                                                                                                                                                                                                               | int        | num_predict 42       |
| top_k          | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)                                                                                                                                                                                           | int        | top_k 40             |
| top_p          | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)                                                                                                                                                                                    | float      | top_p 0.9            |
| min_p          | Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0) | float      | min_p 0.05           |

In [2]:
options = load_options_from_config_file()

model_strings = (
            "llama3.2",
            "llama3",
            "deepseek-r1:8b",
            "gemma3:4b",
            "gemma3:1b")
model_string = model_strings[-1]

In [3]:
with open("prompts/prompt_sandbox", "r") as prompt_file:
    the_prompt = prompt_file.read()

print(generated_json_from_prompt(model_string, the_prompt, options))


question='What public figure defended New York in January 2016?' answer='Donald Trump' statement='Donald Trump defended New York in January 2016.'


In [4]:
with open("prompts/prompt_prefix_for_squad", "r") as prompt_prefix_file:
    prompt_prefix = prompt_prefix_file.read()

with open("prompts/sample_qas_from_squad", "r") as prompt_qa_file:
    prompt_qas = [line.split("\t") for line in prompt_qa_file.readlines()]

for qa in prompt_qas:
    prompt_suffix = "question: " + qa[0] + "\nanswer: " + qa[-1]
    prompt = prompt_prefix + "\n" + prompt_suffix
    response = generated_json_from_prompt(model_string, prompt, options)
    print(response)

question='To whom did the Virgin Mary allegedly appear in 1858 in Lourdes, France?' answer='Saint Bernadette Soubirous' statement='The Virgin Mary allegedly appeared to Saint Bernadette Soubirous in 1858 in Lourdes, France.'
question='What is in front of the Notre Dame Main Building?' answer='a copper statue of Christ' statement='a copper statue of Christ is in front of the Notre Dame Main Building.'
question='The Basilica of the Sacred heart at Notre Dame is beside to which structure?' answer='the Main Building' statement='The Basilica of the Sacred heart at Notre Dame is beside the Main Building.'
question='What is the Grotto at Notre Dame?' answer='a Marian place of prayer and reflection' statement='The Grotto at Notre Dame is a Marian place of prayer and reflection.'
question='What sits on top of the Main Building at Notre Dame?' answer='a golden statue of the Virgin Mary' statement='a golden statue of the Virgin Mary sits on top of the Main Building at Notre Dame.'
question='When 

In [6]:
with open("prompts/prompt_prefix_for_squad", "r") as prompt_prefix_file:
    prompt_prefix = prompt_prefix_file.read()

with open("prompts/sample_qas_from_squad", "r") as prompt_qa_file:
    prompt_qas = [line.split("\t") for line in prompt_qa_file.readlines()]

for qa in prompt_qas:
    prompt_suffix = "question: " + qa[0] + "\nanswer: " + qa[-1]
    prompt = prompt_prefix + "\n" + prompt_suffix
    response = generated_text_from_prompt(model_string, prompt, options)
    print(response)

ResponseError: invalid JSON schema in format (status code: 500)

In [None]:
#the ollama generate fucntion

"""def generate(
    self,
    model: str = '',
    prompt: str = '',
    suffix: str = '',
    *,
    system: str = '',
    template: str = '',
    context: Optional[Sequence[int]] = None,
    stream: Literal[False] = False,
    think: Optional[bool] = None,
    raw: bool = False,
    format: Optional[Union[Literal['', 'json'], JsonSchemaValue]] = None,
    images: Optional[Sequence[Union[str, bytes, Image]]] = None,
    options: Optional[Union[Mapping[str, Any], Options]] = None,
    keep_alive: Optional[Union[float, str]] = None,
  )"""