In [56]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


To parse the log files output by the Adapt script, we will use a finite state machine. Our objective is to collect information on: what operators it added, whether it was an MVP-CEO or OVP-CEO, which operators it added as part of the Tetris protocol, and the final coefficients of those operators.

In [57]:
import re
from dataclasses import dataclass, field
from typing import List


@dataclass
class AdaptData:
    iterations: List["StateData"] = field(default_factory=list)
    coefficients: List[float] = field(default_factory=list)

    def explode_token(self, token: str, explode_qe = True) -> List[str]:
        # token has the form OVP/MVP(...)
        def OVP(*args):
            *ops, sign = args
            sign = '+' if sign == 1 else '-'
            tokens = [f'<ovp {sign}>']

            for l in ops:
                tokens.extend(l)

            tokens.append('</ovp>')
            return tokens


        def MVP(*ops):
            tokens = [f'<mvp>']

            for l in ops:
                tokens.extend(l)
            
            tokens.append('</mvp>')
            return tokens

        if explode_qe:
            def QE(*args):
                tokens = ['<qe>']

                for arg in args:
                    tokens.append(str(arg))
                
                tokens.append('</qe>')
                return tokens
        else:
            def QE(*args):
                token = f"QE({', '.join(map(str, args))})"
                return [token]
        
        return eval(token)

    def to_tokens(self, explode_qe=True) -> List:
        tokens = []

        for data in self.iterations:
            tokens.extend(self.explode_token(data.main_operator, explode_qe=explode_qe))
            tokens.append('<tetris>')
            for token in data.tetris_operators:
                tokens.extend(self.explode_token(token, explode_qe=explode_qe))
            tokens.append('</tetris>')

        return tokens
    
    def token_coeffs(self, tokens: List[str]) -> List[float]:
        results = []

        i = 0
        mode = None
        for token in tokens:
            if token in {'<ovp +>', '<ovp ->'}:
                mode = 'ovp'
            elif token == '<mvp>':
                mode = 'mvp'

            if token.startswith("QE"):
                results.append(self.coefficients[int(i)])
                i += 0.5 if mode == 'ovp' else 1
            else:
                results.append(0)
        
        return results

@dataclass
class StateData:
    main_operator: str = None
    tetris_operators: List[str] = field(default_factory=list)

    def __len__(self):
        return 1 + len(self.tetris_operators)

class State:
    Misc = 0
    NewIteration = 1
    AwaitingCeoType = 2
    ParsingOperator = 3
    Finalizing = 4

class Patterns:
    Sep = re.compile(r"[\s,]+")
    NewIteration = re.compile(r"^\*\*\* ADAPT-VQE Iteration")

    AddingCeo = re.compile(r"Adding (OVP|MVP)-CEO\.")
    AddedOperators = re.compile(r"^Operator\(s\) added to ansatz: (\[.+\])")
    TetrisOperators = re.compile(r"^\d+ viable candidates: (\[.+\])")

    FinalEnergy = re.compile(r"^Final Energy")
    FinalCoefficients = re.compile(r"^Coefficients: (\[.+\])")
    FinalOperators = re.compile(r"^Final operators in the ansatz")
    Operator = re.compile(r"(OVP|MVP)\(.+\)")

def parse_log(text: str):
    adapt_data = AdaptData()
    state = State.Misc
    data = StateData()
    tetris = False

    # State transition function with some bookkeeping about
    # when to save our Adapt iteration data
    def transition_state(new_state):
        nonlocal state, data, tetris
        if new_state in {State.NewIteration, State.Finalizing}:
            if data.main_operator is not None:
                adapt_data.iterations.append(data)

            data = StateData()
            tetris = False

        state = new_state

    # Loop over all lines
    for line in text.splitlines():
        line = line.strip()

        # Generic control flow
        if state != State.Finalizing:
            if match := Patterns.NewIteration.match(line):
                transition_state(State.NewIteration)
                continue
            elif match := Patterns.FinalEnergy.match(line):
                transition_state(State.Finalizing)
                continue

        # Specific states and transitions
        if state == State.NewIteration:
            transition_state(State.AwaitingCeoType)
            continue

        elif state == State.AwaitingCeoType:
            if match := Patterns.AddingCeo.match(line):
                transition_state(State.ParsingOperator)
                continue
            elif match := Patterns.TetrisOperators.match(line):
                tetris = True
                continue

        elif state == State.ParsingOperator:
            if match := Patterns.Operator.match(line):
                if tetris:
                    data.tetris_operators.append(match.group())
                else:
                    data.main_operator = match.group()

                transition_state(State.AwaitingCeoType)
                continue

        elif state == State.Finalizing:
            if match := Patterns.FinalCoefficients.match(line):
                s = match.group(1)[1:-1]
                adapt_data.coefficients.extend(map(float, Patterns.Sep.split(s)))
                continue

    # Check that all the ansatz chunks match the total number of coefficients we read
    # assert len(adapt_data.coefficients) == sum(len(x) for x in adapt_data.iterations)
    return adapt_data

In [58]:
from pathlib import Path

text = Path("log.txt").read_text()
adapt_data = parse_log(text)
tokens = adapt_data.to_tokens(explode_qe=False)
tokens

['<ovp +>',
 'QE(4, 5, 2, 3)',
 'QE(2, 5, 3, 4)',
 '</ovp>',
 '<tetris>',
 '<ovp +>',
 'QE(6, 7, 0, 1)',
 'QE(0, 7, 1, 6)',
 '</ovp>',
 '</tetris>',
 '<ovp +>',
 'QE(4, 7, 0, 3)',
 'QE(0, 7, 3, 4)',
 '</ovp>',
 '<tetris>',
 '<ovp +>',
 'QE(5, 6, 1, 2)',
 'QE(1, 6, 2, 5)',
 '</ovp>',
 '</tetris>',
 '<ovp +>',
 'QE(4, 5, 0, 1)',
 'QE(0, 5, 1, 4)',
 '</ovp>',
 '<tetris>',
 '<ovp +>',
 'QE(6, 7, 2, 3)',
 'QE(2, 7, 3, 6)',
 '</ovp>',
 '</tetris>',
 '<mvp>',
 'QE(5, 6, 0, 3)',
 'QE(3, 6, 0, 5)',
 '</mvp>',
 '<tetris>',
 '<mvp>',
 'QE(4, 7, 1, 2)',
 'QE(2, 7, 1, 4)',
 '</mvp>',
 '</tetris>']

In [61]:
for token, coeff in zip(tokens, adapt_data.token_coeffs(tokens)):
    print(token.ljust(15), coeff)

<ovp +>         0
QE(4, 5, 2, 3)  -0.3568552637516248
QE(2, 5, 3, 4)  -0.3568552637516248
</ovp>          0
<tetris>        0
<ovp +>         0
QE(6, 7, 0, 1)  -0.13377244824954648
QE(0, 7, 1, 6)  -0.13377244824954648
</ovp>          0
</tetris>       0
<ovp +>         0
QE(4, 7, 0, 3)  0.2025085183463875
QE(0, 7, 3, 4)  0.2025085183463875
</ovp>          0
<tetris>        0
<ovp +>         0
QE(5, 6, 1, 2)  0.20250851782832882
QE(1, 6, 2, 5)  0.20250851782832882
</ovp>          0
</tetris>       0
<ovp +>         0
QE(4, 5, 0, 1)  -0.10712702697340878
QE(0, 5, 1, 4)  -0.10712702697340878
</ovp>          0
<tetris>        0
<ovp +>         0
QE(6, 7, 2, 3)  -0.0911882366544645
QE(2, 7, 3, 6)  -0.0911882366544645
</ovp>          0
</tetris>       0
<mvp>           0
QE(5, 6, 0, 3)  -0.15001282593312065
QE(3, 6, 0, 5)  0.34642006949632015
</mvp>          0
<tetris>        0
<mvp>           0
QE(4, 7, 1, 2)  -0.15001282658777504
QE(2, 7, 1, 4)  0.20210538293472965
</mvp>          0
</tetr