In [1]:
from pathlib import Path 
import os, dotenv, yaml

with open("config.yaml", "r") as f:
    config = yaml.safe_load(f)

dotenv.load_dotenv()
os.chdir(Path(config["pythonpath"]).expanduser())

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from dotenv import load_dotenv

In [3]:
VARIATION = "nocomments"
VERSION = config["version"]

source_code_df = pd.read_json(f"data/clean_datasets/{VARIATION}_{VERSION}.jsonl", lines=True)
source_code_df['filetype'] = source_code_df['filetype'].apply(
    lambda x: 'cryptol' if x == 'cry' else 'saw' if x == 'saw' else 'text'
    )

mcc_df = pd.read_json(f"data/clean_datasets/mcc_stats_{VARIATION}_{VERSION}.jsonl", lines=True)

add_columns = mcc_df.columns.difference(source_code_df.columns).tolist()
print(add_columns)
source_code_df = source_code_df.merge(
    mcc_df[["filename", *add_columns]],
    on="filename",
    how="left",
    validate="one_to_one"
)
#source_code_df.head()


['avg_mcc', 'imports', 'imports_count', 'json_path', 'max_mcc', 'num_declarations', 'num_declarations_with_mcc', 'num_definitions', 'num_types', 'total_mcc']


In [4]:
from src.agent.generate import ModelConfig, TemplateBundle, iter_call_pydantic_ai, build_prompt_call_pydantic_ai

file_cache_path = Path(f"cache/alpaca_instruct_cache/SFT_{VARIATION}_source_code_{VERSION}.jsonl")
file_cache_path.parent.mkdir(parents=True, exist_ok=True)  # create dirs if missing

test_df = source_code_df.sample(10, random_state=42).reset_index(drop=True)

modelConfig = ModelConfig(
    model="openai:gpt-5.1",
    retries=2
)

template = TemplateBundle(
    spec_system="system_spec_simple_gen.j2",
    spec_user="user_spec_simple_gen.j2"
)

result = iter_call_pydantic_ai(
    test_df, 
    model_cfg=modelConfig,
    spec_templates=template,
    input_mode="none",
    file_cache_path=file_cache_path
)
result.head()

File path: cryptol_slices/cryptol/examples/funstuff/FoxChickenCorn/010_stuffOnlyMovedWithFarmer.cry

Relevant Cryptol knowledge (for your analysis only — do not quote verbatim and do not include any code in the instruction):
-----8<-----
SOURCE: /Users/josh/Automated_Reasoning_for_Cryptography/DataPreprocess/text/Cryptol-Reference-Manual-MD/Overloaded-Operations.md
### Zero


``` cryptol
Zero
  zero        : {a} (Zero a) => a
```

|                    |                    |
|--------------------|--------------------|
| Type               | Condition          |
| `Bit`              |                    |
| `Integer`          |                    |
| `Rational`         |                    |
| `Z n`              | `fin n`, `n >= 1`  |
| `Float e p`        | `ValidFloat e p`   |
| `[n] a`            | `Zero a`           |
| `a -> b`           | `Zero b`           |
| `(a,b)`            | `Zero a`, `Zero b` |
| `{f1 : a, f2 : b}` | `Zero a`, `Zero b` |

Instances

### Logical Operations




Unnamed: 0,filename,filetype,set,instruction,input,output,content
0,cryptol_slices/cryptol/examples/funstuff/FoxCh...,cryptol,,Write a detailed Cryptol specification for a s...,,,type OneBank = [4]\ntype BankState = {left : O...
1,cryptol_slices/aws-lc-verification/cryptol-spe...,cryptol,,Write Cryptol code that defines a length-256 s...,,,type q = 8380417\ntype Fld = Z q\ntype nn = 25...
2,cryptol-specs/Primitive/Asymmetric/Signature/E...,cryptol,,Write a Cryptol module that specifies ECDSA si...,,,module Primitive::Asymmetric::Signature::ECDSA...
3,cryptol_slices/saw-script/examples/sequential_...,cryptol,,Write a Cryptol specification that defines a g...,,,"type SM i o s = (i, s) -> (o, s)\n\nrun : {n, ..."
4,cryptol-specs/Common/utils.cry,cryptol,,Write a Cryptol module named Common::utils tha...,,,module Common::utils where\n\nwhile : {a} (a -...


In [5]:

import textwrap

WRAP = 80

for idx, row in result.iterrows():
    print(f"=== Example {idx}: {row['filename']} ===")
    instr = str(row["instruction"] or "")
    wrapped_instr = "\n\n".join(
        textwrap.fill(p, width=WRAP, break_long_words=False, break_on_hyphens=False)
        for p in instr.splitlines()
    )
    print(f"Instruction:\n{wrapped_instr}\n")
    print(f"Source Code:\n{'=' * WRAP}\n{row['content']}\n{'=' * WRAP}")


=== Example 0: cryptol_slices/cryptol/examples/funstuff/FoxChickenCorn/010_stuffOnlyMovedWithFarmer.cry ===
Instruction:
Write a detailed Cryptol specification for a state-transition predicate
stuffOnlyMovedWithFarmer in the classic fox–chicken–corn river crossing puzzle.
Represent one river bank as a 4-bit word where each bit indicates the presence
of farmer, fox, chicken, and corn. A BankState should be a record with left and
right banks. The predicate takes two BankState values (before and after a move)
and returns true exactly when: the farmer changes banks, at most one non-farmer
object moves with the farmer, and only items on the farmer’s bank may move.
Include any helper predicates (e.g., farmerHere, moveFollowsRules, popCount) and
appropriate polymorphic typing constraints.

Source Code:
type OneBank = [4]
type BankState = {left : OneBank, right : OneBank}

farmer = 0x1

stuffOnlyMovedWithFarmer : BankState -> BankState -> Bit
stuffOnlyMovedWithFarmer b b' =
  if farmerHere b.l