# Transformer






In [1]:
import torch
checkpoints_path = "/root/eigenestimation/outputs/eigenmodels/"

eigenmodel_path = checkpoints_path + "tinystories-8M-eigenmodel.pt"
eigenmodel = torch.load(eigenmodel_path)['model']
from eigenestimation.evaluation.top_logits import compute_jacobian
from importlib import reload
# reload compute jacobian
import sys
import figure_names

In [2]:
import sys
sys.path.append('/root/eigenestimation')  # Ensure the module path is in sys.path

import eigenestimation.evaluation.top_logits  # Import the full module
from eigenestimation.evaluation.top_logits import compute_jacobian  # Import the function

from importlib import reload
reload(eigenestimation.evaluation.top_logits)  # Reload the entire module

# If you need to ensure the function is updated, re-import it
from eigenestimation.evaluation.top_logits import compute_jacobian

In [3]:
X_data_path = checkpoints_path + "tinystories-8M-X_data.pt"
attributions_path = checkpoints_path + "tinystories-8M-circuit_attributions.pt"
eigenmodel_path = checkpoints_path + "tinystories-8M-eigenmodel.pt"
X_data = torch.load(X_data_path)
attributions = torch.load(attributions_path)
eigenmodel = torch.load(eigenmodel_path)['model']
tokenizer = eigenmodel.model.tokenizer
frac_activated = torch.load(eigenmodel_path)['frac_activated']

In [19]:
import os
import subprocess
import re

def sanitize_for_latex(text):
    """Sanitize text for LaTeX by escaping special characters and removing invalid Unicode."""
    text = text.encode('utf-8', 'ignore').decode('utf-8')  # Remove invalid Unicode
    text = text.replace("�", "")  # Explicitly remove replacement character
    text = re.sub(r'([%_&#$^{}])', r'\\\1', text)  # Escape LaTeX special characters
    text = text.replace('\n', ' ')  # Replace newlines with a space for table formatting
    text = text.strip()  # Remove unnecessary whitespace
    return text

def generate_latex_from_examples(attributions, X_data, tokenizer, circuit_idxs, output_filename="circuit_examples"):
    """
    Generates a LaTeX file from transformer attributions and compiles it into a multi-page PDF.
    """
    latex_content = r"""
\documentclass{article}
\usepackage[utf8]{inputenc}  
\usepackage{booktabs}
\usepackage{array}
\usepackage{longtable}  % Multi-page tables
\usepackage[a4paper, margin=1in]{geometry}
\usepackage{multirow}  % Allows merging cells
\usepackage{xcolor}
\begin{document}

\section*{Circuit Analysis Examples}

\noindent Below is a unified table of all circuits.

\small
\begin{longtable}{|p{0.15\textwidth}|p{0.45\textwidth}|p{0.35\textwidth}|}
\hline
\textbf{Id ($P_{act}$)} & \textbf{Input Text} & \textbf{Top Logits} \\
\hline
\endfirsthead  % Header repeated on new pages
\hline
\textbf{Id ($P_{act}$)} & \textbf{Input Text} & \textbf{Top Logits} \\
\hline
\endhead

\hline
\multicolumn{3}{r}{\textit{Continued on next page}} \\
\hline
\endfoot

\hline
\endlastfoot
"""

    # Iterate through attributions and format them in LaTeX
    for circuit_idx in circuit_idxs:
        activation = attributions[circuit_idx]['activation']
        circuit_label = f"{circuit_idx} ({activation:.3f})"

        examples = attributions[circuit_idx]['top_examples']
        num_rows = len(examples)  # Count rows for multirow consistency

        for i, example in enumerate(examples):
            tokens = X_data[example['sample_id']][:example['token_id']+1]
            text = sanitize_for_latex(tokenizer.decode(tokens.long()))
            jac = compute_jacobian(eigenmodel, tokens.to('cuda'), circuit_idx, device='cuda')
            top_token_idxs = (jac).argsort(descending=True)[:5]
            top_logits = sanitize_for_latex(', '.join([eigenmodel.model.tokenizer.decode(token_idx) for token_idx in top_token_idxs]))

            # Add circuit ID only for the first row of each circuit group
            if i == 0:
                latex_content += f"& & \\\\\n"
                latex_content += f"\multirow{{{num_rows}}}{{*}}{{\\textbf{{{circuit_label}}}}} & {text} & {top_logits} \\\\\n"
            else:
                latex_content += f"& {text} & {top_logits} \\\\\n"

    latex_content += r"""
\end{longtable}

\end{document}
"""

    # Save LaTeX content to a file
    tex_filename = f"{output_filename}.tex"
    with open(tex_filename, "w", encoding="utf-8") as f:
        f.write(latex_content)

    print(f"LaTeX file '{tex_filename}' created successfully.")

    # Compile the LaTeX file into a PDF
    try:
        subprocess.run(["pdflatex", "-interaction=nonstopmode", tex_filename], check=True)
        print(f"PDF '{output_filename}.pdf' generated successfully.")

        # Cleanup auxiliary files
        for ext in [".aux", ".log"]:
            aux_file = f"{output_filename}{ext}"
            if os.path.exists(aux_file):
                os.remove(aux_file)

    except subprocess.CalledProcessError:
        print("Error: Failed to compile the LaTeX file.")

# Example usage:
generate_latex_from_examples(attributions, X_data, tokenizer, list(range(10)))


LaTeX file 'circuit_examples.tex' created successfully.
This is pdfTeX, Version 3.141592653-2.6-1.40.22 (TeX Live 2022/dev/Debian) (preloaded format=pdflatex)
 restricted \write18 enabled.
entering extended mode
(./circuit_examples.tex
LaTeX2e <2021-11-15> patch level 1
L3 programming layer <2022-01-21>
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2021/10/04 v1.4n Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty)
(/usr/share/texlive/texmf-dist/tex/latex/booktabs/booktabs.sty)
(/usr/share/texlive/texmf-dist/tex/latex/tools/array.sty)
(/usr/share/texlive/texmf-dist/tex/latex/tools/longtable.sty)
(/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty)
(/usr/share/texlive/texmf-dist/tex/generic/iftex/ifvtex.sty
(/usr/share/texlive/texmf-dist/tex/generic/iftex/iftex.sty)))
(/usr/share/texl

In [23]:
circuit_idxs = [0, 5, 16, 18, 21, 30, 59, 71, 76, 86]
generate_latex_from_examples(attributions, X_data, tokenizer, circuit_idxs, figure_names.fav_circuits_transformer)

generate_latex_from_examples(attributions, X_data, tokenizer, list(range(100)), figure_names.all_circuits_transformer)

LaTeX file '/root/eigenestimation/figures/favorite_transformer_circuits.tex' created successfully.
This is pdfTeX, Version 3.141592653-2.6-1.40.22 (TeX Live 2022/dev/Debian) (preloaded format=pdflatex)
 restricted \write18 enabled.
entering extended mode
(/root/eigenestimation/figures/favorite_transformer_circuits.tex
LaTeX2e <2021-11-15> patch level 1
L3 programming layer <2022-01-21>
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2021/10/04 v1.4n Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/base/inputenc.sty)
(/usr/share/texlive/texmf-dist/tex/latex/booktabs/booktabs.sty)
(/usr/share/texlive/texmf-dist/tex/latex/tools/array.sty)
(/usr/share/texlive/texmf-dist/tex/latex/tools/longtable.sty)
(/usr/share/texlive/texmf-dist/tex/latex/geometry/geometry.sty
(/usr/share/texlive/texmf-dist/tex/latex/graphics/keyval.sty)
(/usr/share/texlive/texmf-dist/tex/generic/iftex/ifvte