In [1]:
from pathlib import Path 
import os, dotenv, yaml

with open("config.yaml", "r") as f:
    config = yaml.safe_load(f)

dotenv.load_dotenv()
os.chdir(Path(config["pythonpath"]).expanduser())

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from dotenv import load_dotenv

In [3]:
VARIATION = "nocomments"
VERSION = config["version"]

source_code_df = pd.read_json(f"data/clean_datasets/{VARIATION}_{VERSION}.jsonl", lines=True)
source_code_df['filetype'] = source_code_df['filetype'].apply(
    lambda x: 'cryptol' if x == 'cry' else 'saw' if x == 'saw' else 'text'
    )

source_code_df.head()


Unnamed: 0,filename,filetype,content,variant,n_imports_original,n_imports_final
0,AES-GCM-SIV-proof/proof/cryptol-specs/AES.cry,cryptol,module AES where\n\nimport `Common::AES\n\ntyp...,without_comments,,
1,AES-GCM-SIV-proof/proof/cryptol-specs/AES128.cry,cryptol,module AES128 where\n\nimport `Common::AES\nim...,without_comments,,
2,AES-GCM-SIV-proof/proof/cryptol-specs/AES256.cry,cryptol,module AES256 where\n\nimport `Common::AES\nim...,without_comments,,
3,AES-GCM-SIV-proof/proof/cryptol-specs/TBox.cry,cryptol,type Nb = 4\ntype State = [4][Nb]...,without_comments,,
4,AES-GCM-SIV-proof/proof/cryptol-specs/Common/A...,cryptol,module Common::AES where\n\nparameter\n type ...,without_comments,,


In [None]:
from src.agent.generate import ModelConfig, TemplateBundle, iter_call_pydantic_ai, build_prompt_call_pydantic_ai

file_cache_path = Path(f"cache/alpaca_instruct_cache/SFT_{VARIATION}_source_code_{VERSION}.jsonl")
file_cache_path.parent.mkdir(parents=True, exist_ok=True)  # create dirs if missing

test_df = source_code_df.sample(10,_s_sssd_sasdffad_ fffssfad---00asfdfffadsfafsadfsddfafasdfasdffdsaffffsadfrandom_state=42).reset_index(drop=True)

modelConfig = ModelConfig(
    model="openai:gpt-5.1",
    retries=2
)

template = TemplateBundle(
    spec_system="system_spec_simple_gen.j2",
    spec_user="user_spec_simple_gen.j2"
)

result = iter_call_pydantic_ai(
    test_df, 
    model_cfg=modelConfig,
    spec_templates=template,
    input_mode="none",
    file_cache_path=file_cache_path
)
result.head()

File path: cryptol_slices/aws-lc-verification/cryptol-specs-aes-gcm/Common/ntt/010_unit.cry

Relevant Cryptol knowledge (for your analysis only — do not quote verbatim and do not include any code in the instruction):
-----8<-----
SOURCE: /Users/josh/Automated_Reasoning_for_Cryptography/DataPreprocess/text/Cryptol-Reference-Manual-MD/Basic-Syntax.md
## Numeric Literals
Numeric literals may be written in binary, octal, decimal, or
hexadecimal notation. The base of a literal is determined by its prefix:
`0b` for binary, `0o` for octal, no special prefix for decimal, and `0x`
for hexadecimal.

``` cryptol
254                 // Decimal literal
0254                // Decimal literal
0b11111110          // Binary literal
0o376               // Octal literal
0xFE                // Hexadecimal literal
0xfe                // Hexadecimal literal
```

Numeric literals in binary, octal, or hexadecimal notation result in bit
sequences of a fixed length (i.e., they have type `[n]` for some
<span cla

Unnamed: 0,filename,filetype,set,instruction,input,output,content
0,cryptol_slices/cryptol/examples/funstuff/FoxCh...,cryptol,,Write a Cryptol module that defines the Fox-Ch...,,,type OneBank = [4]\ntype BankState = {left : O...
1,cryptol_slices/aws-lc-verification/cryptol-spe...,cryptol,,Write a Cryptol module that defines a finite f...,,,type q = 8380417\ntype Fld = Z q\ntype nn = 25...
2,cryptol-specs/Primitive/Asymmetric/Signature/E...,cryptol,,Write a Cryptol module that gives a constraine...,,,module Primitive::Asymmetric::Signature::ECDSA...
3,cryptol_slices/saw-script/examples/sequential_...,cryptol,,"Write a Cryptol module that defines a generic,...",,,"type SM i o s = (i, s) -> (o, s)\n\nrun : {n, ..."
4,cryptol-specs/Common/utils.cry,cryptol,,Write a Cryptol module that defines a collecti...,,,module Common::utils where\n\nwhile : {a} (a -...


In [None]:

import textwrap

WRAP = 80

for idx, row in result.iterrows():
    print(f"=== Example {idx}: {row['filename']} ===")
    instr = str(row["instruction"] or "")
    wrapped_instr = "\n\n".join(
        textwrap.fill(p, width=WRAP, break_long_words=False, break_on_hyphens=False)
        for p in instr.splitlines()
    )
    print(f"Instruction:\n{wrapped_instr}\n")
    print(f"Source Code:\n{'=' * WRAP}\n{row['content']}\n{'=' * WRAP}")


=== Example 0: cryptol_slices/cryptol/examples/funstuff/FoxChickenCorn/010_stuffOnlyMovedWithFarmer.cry ===
Instruction:
Write a Cryptol module that defines the Fox-Chicken-Corn river crossing state
and a predicate named stuffOnlyMovedWithFarmer that checks whether a transition
between two bank states is valid. Represent each bank’s contents as a 4-bit word
(farmer plus three objects), and define a record type for the overall state with
left and right banks. The predicate should return true exactly when any objects
that change banks do so only in combination with the farmer, and at most one
non-farmer object moves per step. Include any helper predicates you need, such
as checking where the farmer is, counting moved bits, and constraining moves to
be legal.

Source Code:
type OneBank = [4]
type BankState = {left : OneBank, right : OneBank}

farmer = 0x1

stuffOnlyMovedWithFarmer : BankState -> BankState -> Bit
stuffOnlyMovedWithFarmer b b' =
  if farmerHere b.left
  then farmerHere b'.r