In [None]:
# Install dependencies
!sudo apt-get update
!sudo apt-get install -y curl build-essential

# Install Lean 4 via elan
!curl https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh -sSf | sh -s -- -y

# Add Lean to PATH for the current session
import os
os.environ['PATH'] += ":/root/.elan/bin"

# Check Lean version
!lean --version

0% [Working]            Get:1 https://cli.github.com/packages stable InRelease [3,917 B]
Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:3 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:4 https://cli.github.com/packages stable/main amd64 Packages [357 B]
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:6 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ Packages [85.0 kB]
Hit:7 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Get:9 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [9,776 kB]
Get:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease [24.6 kB]
Get:11 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:12 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy/main amd64 Packages [39.2 kB]
Get:13 https://r2u.stat.illinois.edu/ubuntu j

In [None]:
import os
import subprocess

# 1. Create the Lake configuration file (lakefile.lean)
# This tells Lean that "Main" and "Solution" are libraries in this project.
with open("lakefile.lean", "w") as f:
    f.write("""
import Lake
open Lake DSL

package my_proofs

@[default_target]
lean_lib Main

@[default_target]
lean_lib Solution

-- Define the extractor executable
lean_exe extract where
  root := `Extract
  supportInterpreter := true -- Required to access the Lean compiler internals
""")

# 2. Create Main.lean (Capitalized)
with open("Main.lean", "w") as f:
    f.write("""
def Goal_1 : Prop :=
  ∀ n : Nat, 0 < n → 3 ∣ n ^ 2 → 3 ∣ n

-- proof left as sorry
theorem Goal_1_proof : Goal_1 := by
  sorry
""")

# 3. Create Solution.lean (Capitalized, imports Main)
with open("Solution.lean", "w") as f:
    f.write("""
import Main

theorem Solution : Goal_1 := by
  intro n hpos hdiv
  -- Since 3 is prime, if 3 divides n * n, it divides n
  exact Nat.prime.dvd_of_dvd_mul (by norm_num) hdiv
""")

# 4. Build the project
# This compiles Main.lean first, then checks Solution.lean
print("Building project...")
result = subprocess.run(["lake", "build"], capture_output=True, text=True)

if result.returncode == 0:
    print("✅ Build Successful! Main was imported into Solution correctly.")
else:
    print("❌ Build Failed:")
    print(result.stderr)
    print(result.stdout)

Building project...
❌ Build Failed:
error: build failed

⚠ [2/5] Built Main (337ms)
✖ [4/5] Building Solution (310ms)
trace: .> LEAN_PATH=/content/.lake/build/lib/lean /root/.elan/toolchains/leanprover--lean4---v4.28.0/bin/lean /content/Solution.lean -o /content/.lake/build/lib/lean/Solution.olean -i /content/.lake/build/lib/lean/Solution.ilean -c /content/.lake/build/ir/Solution.c --setup /content/.lake/build/ir/Solution.setup.json --json
error: Solution.lean:7:38: unknown tactic
error: Solution.lean:7:8: Unknown constant `Nat.prime.dvd_of_dvd_mul`
error: Lean exited with code 1
Some required targets logged failures:
- Solution



In [None]:
!lake env lean Solution.lean

# Extract Function

In [None]:
import os

with open("Extract.lean", "w") as f:
    f.write("""
import Lean
open Lean Elab Frontend Meta Json

structure DeclInfo where
  name : String
  type : String
  proofTerm : String
  dependencies : List String
  deriving ToJson

unsafe def main (args : List String) : IO UInt32 := do
  if args.length != 1 then
    IO.println "Usage: lake exe extract <filename.lean>"
    return 1

  let fileName := args.head!
  let input ← IO.FS.readFile fileName

  let sysroot ← Lean.findSysroot
  let leanPath ← IO.getEnv "LEAN_PATH"
  let sp : System.SearchPath := match leanPath with
    | some p => System.SearchPath.parse p
    | none => []
  Lean.initSearchPath sysroot sp

  let inputCtx := Parser.mkInputContext input fileName
  let (header, parserState, messages) ← Parser.parseHeader inputCtx
  let (env, messages) ← processHeader header {} messages inputCtx

  let cmdState := Command.mkState env messages {}
  let frontendState ← IO.processCommands inputCtx parserState cmdState
  let env := frontendState.commandState.env

  let mut decls : List DeclInfo := []
  let fileMap := FileMap.ofString input

  -- FIX: Aggressive Pretty Printing Options to stop "⋯"
  let opts : Options := Options.empty
    |>.insert `pp.maxDepth (DataValue.ofNat 20000)
    |>.insert `pp.width (DataValue.ofNat 180)
    |>.insert `pp.deepTerms (DataValue.ofBool false)  -- Disable deep term truncation
    |>.insert `pp.proofs (DataValue.ofBool true)      -- Print full proofs
    |>.insert `pp.minSteps (DataValue.ofNat 10000)    -- Increase stepping limits

  let coreCtx : Core.Context := { fileName := fileName, fileMap := fileMap, options := opts }
  let coreSt : Core.State := { env := env }

  let constants := env.constants.map₂

  for (name, cinfo) in constants do
    let nameStr := name.toString

    -- STRICT FILTER: Ignore internal match helpers completely.
    -- If 'lemma2' prints as 'match ...', we don't need 'lemma2.match_1'.
    let isInternal := nameStr.contains "match_" || nameStr.contains "proof_" || name.isInternal

    if (env.getModuleIdxFor? name).isNone && !isInternal then
      if cinfo.hasValue then
        try
          let (typeStr, valStr, deps) ← (MetaM.run' <| do
              let type ← ppExpr cinfo.type
              let valExpr := cinfo.value?.getD (Expr.sort Level.zero)
              let valPp ← ppExpr valExpr

              -- Clean Dependencies
              let used := valExpr.getUsedConstants
              let validDeps := used.toList.filterMap fun n =>
                let nStr := n.toString
                -- Don't list matchers as dependencies either
                if !nStr.contains "match_" && !nStr.contains "proof_" then
                  some nStr
                else
                  none

              return (type.pretty, valPp.pretty, validDeps)
          ).run' coreCtx coreSt |>.toIO (fun _ => IO.userError "Meta error")

          decls := decls.concat {
            name := nameStr,
            type := typeStr,
            proofTerm := valStr,
            dependencies := deps
          }
        catch _ => pure ()

  let json := Json.mkObj [
    ("success", true),
    ("module", env.mainModule.toString),
    ("declarations", toJson decls)
  ]

  IO.println json.pretty
  return 0
""")

# Rebuild and Extract
import subprocess
print("Building Extractor...")
subprocess.run(["lake", "build", "extract"], check=True)

print("Running Extraction...")
res = subprocess.run(["lake", "exe", "extract", "Solution.lean"], capture_output=True, text=True)

if res.returncode == 0:
    print("✅ JSON Generated.")
    with open("extracted.json", "w") as f:
        f.write(res.stdout)
else:
    print("❌ Extraction Failed:")
    print(res.stderr)

Building Extractor...
Running Extraction...
✅ JSON Generated.


In [None]:
!lake build extract

[1;31merror:[m unknown target `extract`


# Rebuild proof

In [None]:
import json
import networkx as nx
import sys
import matplotlib.pyplot as plt
import os

In [None]:
def rebuild_proof(original_main, json_path, target_goal, output_file):
    print(f"--- Processing {json_path} ---")

    # 1. Parse Original Main.lean to get the 'Preamble'
    # We keep everything up to the definition of the target theorem
    preamble_lines = []
    found_target = False

    if not os.path.exists(original_main):
        print(f"Error: {original_main} not found.")
        return

    with open(original_main, 'r') as f:
        for line in f:
            # Stop when we hit the theorem we are replacing
            if line.strip().startswith(f"theorem {target_goal}"):
                found_target = True
                break
            preamble_lines.append(line)

    if not found_target:
        print(f"Warning: Could not find 'theorem {target_goal}' in {original_main}.")
        # We proceed anyway, assuming the user might want to append to the end

    preamble = "".join(preamble_lines)

    # 2. Load JSON
    with open(json_path, 'r') as f:
        data = json.load(f)

    # 3. Build Graph
    G = nx.DiGraph()

    # Helper to map temporary solution name to final target name
    def get_final_name(n):
        return target_goal if n == f"{target_goal}_solution" else n

    # Store code snippets in the graph nodes
    defined_nodes = set()

    for decl in data['declarations']:
        raw_name = decl['name']
        name = get_final_name(raw_name)

        # Determine if theorem or def
        # (Simple heuristic: proofs of Props are theorems)
        kind = "theorem" if "Prop" in decl['type'] or "Even" in decl['type'] else "def"

        # Generate the Lean code block
        code = f"{kind} {name} : {decl['type']} := {decl['proofTerm']}"

        G.add_node(name, code=code)
        defined_nodes.add(name)

        # Add edges for dependencies
        for dep in decl['dependencies']:
            dep_name = get_final_name(dep)
            # Edge direction: Dependency -> Node
            # (Because Node depends on Dependency, so Dependency must come first)
            G.add_edge(dep_name, name)

    # 4. Prune the Graph
    # We only want nodes that 'target_goal' relies on.
    if target_goal in G:
        # Get all ancestors (dependencies) of the target
        relevant_nodes = nx.ancestors(G, target_goal)
        relevant_nodes.add(target_goal) # Include the target itself

        # Create subgraph
        G_pruned = G.subgraph(relevant_nodes).copy()
        print(f"Graph Pruned: {len(G)} nodes -> {len(G_pruned)} nodes.")

        # Check what was dropped
        dropped = set(defined_nodes) - set(G_pruned.nodes)
        if dropped:
            print(f"Dropped unused lemmas: {dropped}")
    else:
        print(f"Error: Target {target_goal} not found in generated proofs.")
        return

    # 5. Visualization
    print("\n--- Dependency Tree ---")
    plot_knowledge_graph(G_pruned, target_goal)
    print("-----------------------\n")

    # 6. Topological Sort for compilation order
    try:
        sorted_nodes = list(nx.topological_sort(G_pruned))
    except nx.NetworkXUnfeasible:
        print("Error: Cycle detected in dependencies.")
        return

    # 7. Write Result
    with open(output_file, 'w') as f:
        f.write("-- GENERATED PROOF FILE\n")
        # Ensure we have standard imports
        if "import Lean" not in preamble:
            f.write("import Lean\n")

        f.write(preamble.strip() + "\n\n")
        f.write("/-- Reconstructed Proofs (Topologically Sorted) --/\n\n")

        for node in sorted_nodes:
            # Only write code for nodes we actually defined in the JSON
            # (Ignore external nodes like 'Nat', 'Even' that are in the graph as deps)
            if node in G_pruned.nodes and "code" in G_pruned.nodes[node]:
                f.write(G_pruned.nodes[node]["code"] + "\n\n")

    print(f"✅ Successfully generated {output_file}")

In [None]:

def plot_knowledge_graph(G, target_goal):
    """
    Visualizes the dependency graph using Matplotlib.
    Only plots the nodes defined in the new proof (the "knowledge graph").
    """
    # 1. Filter the graph to only include nodes we defined (those with 'code')
    # This removes external dependencies like 'Nat', 'Even', etc. from the plot.
    plot_nodes = [n for n, data in G.nodes(data=True) if 'code' in data]
    plot_G = G.subgraph(plot_nodes)

    if plot_G.number_of_nodes() == 0:
        print("Plotting skipped: No new internal dependencies to visualize.")
        return

    # 2. Set up colors and layout
    node_colors = []
    for node in plot_G.nodes():
        if node == target_goal:
            node_colors.append('skyblue')  # The main goal
        else:
            node_colors.append('lightgreen')  # Supporting lemmas

    # Use a layout that spreads nodes out
    pos = nx.spring_layout(plot_G, k=1.5, iterations=50, seed=42)

    # 3. Draw the graph
    plt.figure(figsize=(10, 7))

    nx.draw_networkx_nodes(plot_G, pos, node_size=3000, node_color=node_colors, edgecolors='black')
    nx.draw_networkx_edges(plot_G, pos,
                           arrowstyle='->',
                           arrowsize=20,
                           node_size=3000,
                           width=1.5,
                           connectionstyle='arc3,rad=0.1')
    nx.draw_networkx_labels(plot_G, pos, font_size=10, font_weight='bold')

    # 4. Display the plot
    plt.title("Proof Dependency Graph", size=15)
    plt.axis('off')  # Hide the axes
    plt.show()

# Run Function

In [None]:
rebuild_proof(
        original_main="Main.lean",
        json_path="extracted.json",
        target_goal="Goal_1_proof",
        output_file="generatedMain.lean"
    )

--- Processing extracted.json ---
Error: Target Goal_1_proof not found in generated proofs.


In [None]:
!lake env lean generatedMain.lean