In [1]:
import pandas as pd
import re
from typing import *

In [2]:
zebraLogicBench = pd.read_parquet("hf://datasets/allenai/ZebraLogicBench/grid_mode/test-00000-of-00001.parquet")

In [3]:
zebraLogicBench[0:2]

Unnamed: 0,id,size,puzzle,solution,created_at
0,lgp-test-5x6-16,5*6,"There are 5 houses, numbered 1 to 5 from left ...","{'header': ['House', 'Name', 'Nationality', 'B...",2024-07-11T01:22:10.734298
1,lgp-test-4x4-27,4*4,"There are 4 houses, numbered 1 to 4 from left ...","{'header': ['House', 'Name', 'Occupation', 'Bo...",2024-07-11T01:22:10.732670


In [4]:
puzzle1 = zebraLogicBench['puzzle'][0]
print(puzzle1)

There are 5 houses, numbered 1 to 5 from left to right, as seen from across the street. Each house is occupied by a different person. Each house has a unique attribute for each of the following characteristics:
 - Each person has a unique name: `Peter`, `Alice`, `Bob`, `Eric`, `Arnold`
 - The people are of nationalities: `norwegian`, `german`, `dane`, `brit`, `swede`
 - People have unique favorite book genres: `fantasy`, `biography`, `romance`, `mystery`, `science fiction`
 - Everyone has something unique for lunch: `stir fry`, `grilled cheese`, `pizza`, `spaghetti`, `stew`
 - Each person has a favorite color: `red`, `green`, `blue`, `yellow`, `white`
 - The people keep unique animals: `bird`, `dog`, `cat`, `horse`, `fish`

## Clues:
1. The person who loves fantasy books is the Norwegian.
2. The cat lover and the person who loves biography books are next to each other.
3. The German is Bob.
4. The person who loves yellow is Bob.
5. The person whose favorite color is green is Peter.
6. 

In [5]:
# result = re.findall(r'There are (\d+) houses, numbered 1 to \d+ from left to right, as seen from across the street. Each house is occupied by a different person. Each house has a unique attribute for each of the following characteristics:', puzzle1)
result = re.findall(r'There are (\d+) houses, numbered 1 to \d+ from left to right', puzzle1)

if len(result) != 1:
    print("Invlaid count")

house_count = int(result[0])

house_count

5

In [6]:

# result = re.findall(r' - (.*?): (`(.*?)`)(?:, (`(.*?)`))*', puzzle1)

x = []
for i in range(0, house_count):
    x.append('`(.*?)`')

var_parser = ", ".join(x)

variables = re.findall(r' - (.*?): ' + var_parser, puzzle1)
variables


[('Each person has a unique name', 'Peter', 'Alice', 'Bob', 'Eric', 'Arnold'),
 ('The people are of nationalities',
  'norwegian',
  'german',
  'dane',
  'brit',
  'swede'),
 ('People have unique favorite book genres',
  'fantasy',
  'biography',
  'romance',
  'mystery',
  'science fiction'),
 ('Everyone has something unique for lunch',
  'stir fry',
  'grilled cheese',
  'pizza',
  'spaghetti',
  'stew'),
 ('Each person has a favorite color',
  'red',
  'green',
  'blue',
  'yellow',
  'white'),
 ('The people keep unique animals', 'bird', 'dog', 'cat', 'horse', 'fish')]

In [7]:
# 1. The person who loves fantasy books is the Norwegian.
clues = re.findall(r'\d+. (.*?)\.', puzzle1)


clues

['The person who loves fantasy books is the Norwegian',
 'The cat lover and the person who loves biography books are next to each other',
 'The German is Bob',
 'The person who loves yellow is Bob',
 'The person whose favorite color is green is Peter',
 'There is one house between the Dane and the person who is a pizza lover',
 'The person who loves blue is somewhere to the left of the Dane',
 'The person who loves eating grilled cheese is somewhere to the left of the Norwegian',
 'The person who loves the spaghetti eater is Peter',
 'The person who keeps horses is Alice',
 'The fish enthusiast is directly left of the person who loves science fiction books',
 'There is one house between the Norwegian and Arnold',
 'The person who loves romance books is the British person',
 'There are two houses between the Norwegian and Alice',
 'The bird keeper is the person whose favorite color is red',
 'The dog owner is directly left of the fish enthusiast',
 'The person who loves the stew is the 

In [18]:

class PzVariableGroup:
    name : str
    variables : List[str]
    
    def __init__(self, name : str, variables: List[str]):
        self.name = name
        self.variables = variables
        
    def __str__(self):
        return f'[{", ".join(self.variables)}] {self.name}'
    
    def __repr__(self):
        return self.__str__()


def parse_puzzle_variable(text : str, house_count : int) -> List[PzVariableGroup]:
    
    # build variables matcher
    x = []
    for i in range(0, house_count):
        x.append('`(.*?)`')
    var_parser = ", ".join(x)
    
    
    raw_variables = re.findall(r' - (.*?): ' + var_parser, text)
    
    variables = []
    for group in raw_variables:
        group_name = group[0]
        
        # clean-up / transform the variables that they match with their usage in clues
        cleaned_variables = []
        for v in group[1:]:
            if ("child" in group_name):
                cleaned_variables.append(f'child is named {v}')
                cleaned_variables.append(f'mother of {v}')
                
            elif "month" in group_name:
                if v == "jan": 
                    cleaned_variables.append("january")
                else:
                    cleaned_variables.append(v)
            
            # elif "keep unique animals" in group_name:
            #     l.append(f'{v} kepper')
                
            elif "hip hop" == v:
                cleaned_variables.append("hip-hop")
                
            elif "swede" == v:
                cleaned_variables.append("swedish")
            
            elif "ford f150" == v:
                cleaned_variables.append("Ford F-150")
            
            elif "cat" == v: 
                cleaned_variables.append(" cat") # prevent match with vacation
                
            else:
                if v.endswith('ing'):
                    v = v[:-3]
                elif v.endswith('s'):
                    v = v[:-1]
                cleaned_variables.append(v)
        
        variables.append(PzVariableGroup(group_name, cleaned_variables))

    return variables

In [27]:
class PzClue:
    clue : str
    variables : List[str]
    function : str | None
    
    def __init__(self, clue : str, vars : List[str], func : str | None):
        self.clue = clue
        self.variables = vars
        self.function = func

    def is_valid(self):
        return self.function is not None
    
    def __str__(self):
        return f'{self.variables} -> {self.function}; {self.clue}'
    
    def __repr__(self):
        return self.__str__()
    

def check_for_clue(var1, var2, regex, text):
    
    sa = regex.replace("%1", var1).replace("%2", var2)
    if re.search(sa, text, re.IGNORECASE):
        return True
    
    sb = regex.replace("%1", var2).replace("%2", var1)
    if re.search(sb, text, re.IGNORECASE):
        return True
    
    return False

def check_for_single_clue(var1, regex, text):
    
    sa = regex.replace("%1", var1)
    if re.search(sa, text, re.IGNORECASE):
        return True
    
    return False

def analyze_clue(vars, clue):
    
    if len(vars) == 1:
        if check_for_single_clue(vars[0], "%1(.*?) first house", clue):
            return "is1"
        if check_for_single_clue(vars[0], "%1(.*?) second house", clue):
            return "is2"
        if check_for_single_clue(vars[0], "%1(.*?) third house", clue):
            return "is3"
        if check_for_single_clue(vars[0], "%1(.*?) fourth house", clue):
            return "is4"
        if check_for_single_clue(vars[0], "%1(.*?) fifth house", clue):
            return "is5"
        if check_for_single_clue(vars[0], "%1(.*?) sixth house", clue):
            return "is6"
    
    if len(vars) == 2:
        if check_for_clue(vars[0], vars[1], "one house between(.*?)%1(.*?)%2", clue):
            return "oneBetween"
        if check_for_clue(vars[0], vars[1], "two houses between(.*?)%1(.*?)%2", clue):
            return "twoBetween"
        if check_for_clue(vars[0], vars[1], "%1(.*?)%2(.*?)next to each other", clue):
            return "nextTo"
        if check_for_clue(vars[0], vars[1], "%1(.*?)directly left of(.*?)%2", clue):
            return "dLeftOf"
        if check_for_clue(vars[0], vars[1], "%1(.*?)left of(.*?)%2", clue):
            return "leftOf"
        if check_for_clue(vars[0], vars[1], "%1(.*?)directly right of(.*?)%2", clue):
            return "dRightOf"
        if check_for_clue(vars[0], vars[1], "%1(.*?)right of(.*?)%2", clue):
            return "rightOf"
        # if check_for_clue(vars[0], vars[1], "%1(.*?)is (the |a )?%2", clue):
        if check_for_clue(vars[0], vars[1], "%1(.*?)is(.*?)%2", clue):
            return "equal"

    return None


def analyze_clues(variables: List[PzVariableGroup], raw_clues : List[str]) -> List[PzClue]:

    # sometimes a variable contains another variables value (eg: 'child of alice' and 'alice')
    # all variables are sorted by length, each match is the removed from the clue text.
    # This ensures that none of the shorter variables can match a part from a longer variable
    all_variables = []
    for var_group in variables:
        for var in var_group.variables:
            all_variables.append(var)
    all_variables = sorted(all_variables, key=len, reverse=True)
    
    clues = []
    for c in raw_clues:
                           
        # extract all variables used in clue
        vars = []
        test_clue = c
        for var in all_variables:
            if re.search(var, test_clue, re.IGNORECASE):
                vars.append(var)
                # test_clue = test_clue.replace(var, "")
                test_clue = re.sub(re.escape(var), '', test_clue, flags=re.IGNORECASE)
        
        lower_clue = c.lower()    
        vars = sorted(vars, key=lambda s: lower_clue.find(s.lower()))
                
        # find the function the clue implies
        func = analyze_clue(vars, c)
        
        clues.append( PzClue(c, vars, func) )

    return clues
    

In [23]:
class PzPuzzleDefinition:
    house_count : int
    variables : List[PzVariableGroup]
    clues : List[PzClue]
    
    def __init__(self, house_count : int, variables : List[PzVariableGroup], clues):
        self.house_count = house_count
        self.variables = variables
        self.clues = clues

    def is_valid(self):
        for c in self.clues:
            if not c.is_valid():
                return False
        return True
    
    def __repr__(self):
        s = f'Houses: {self.house_count}\n'
        s += 'Vars:\n'
        for v in self.variables:
            s += f' {v}\n'
        
        s += 'Clues:\n'
        for c in self.clues:
            s += f' {c}\n'
        
        return s


def analyze_puzzle_text(text):
    # House count
    result = re.findall(r'There are (\d+) houses, numbered 1 to \d+ from left to right', text)

    if len(result) != 1:
        raise Exception("Invalid house count")

    house_count = int(result[0])
        
    # variables
    variables = parse_puzzle_variable(text, house_count)
    
    # clues
    raw_clues = re.findall(r'\d+. (.*?)\.', text)
    clues = analyze_clues(variables, raw_clues)
    
    return PzPuzzleDefinition(house_count, variables, clues)

In [28]:
puzzle = zebraLogicBench['puzzle'][1]
p1 = analyze_puzzle_text(puzzle)

if not p1.is_valid():
    print("Invalid")
    
    for c in p1.clues:
        if not c.is_valid():
            print(c.variables, c.clue)    

    # display((c[2], c[0]))
    # print(c[2], c[0])

print(p1)


Houses: 4
Vars:
 [Alice, Eric, Arnold, Peter] Each person has a unique name
 [artist, engineer, teacher, doctor] Each person has an occupation
 [fantasy, science fiction, mystery, romance] People have unique favorite book genres
 [google pixel 6, iphone 13, oneplus 9, samsung galaxy s21] People use unique phone models
Clues:
 ['engineer', 'samsung galaxy s21'] -> dLeftOf; The person who is an engineer is directly left of the person who uses a Samsung Galaxy S21
 ['fantasy'] -> is2; The person who loves fantasy books is in the second house
 ['Alice'] -> is2; Alice is not in the second house
 ['Eric', 'teacher'] -> equal; Eric is the person who is a teacher
 ['samsung galaxy s21', 'fantasy'] -> equal; The person who uses a Samsung Galaxy S21 is the person who loves fantasy books
 ['iphone 13', 'science fiction'] -> equal; The person who uses an iPhone 13 is the person who loves science fiction books
 ['science fiction', 'oneplus 9'] -> leftOf; The person who loves science fiction books i

In [12]:
re.search("tea", "The person who is a teacher is directly left of the tea drinker")

<re.Match object; span=(20, 23), match='tea'>

In [13]:

count_total = 0
count_broken = 0

index = 0
first_broken = -1
broken = []

for puzzle in zebraLogicBench['puzzle']:
    p1 = analyze_puzzle_text(puzzle)
    
    unsuccessfull = False
    for c in p1.clues:
        if c[2] is None:
            unsuccessfull = True
    
    if unsuccessfull:
        count_broken = count_broken + 1 
        
        if first_broken == -1:
            first_broken = index
        broken.append(index)
        
    count_total = count_total + 1
    
    index = index + 1
    
display(broken)
print(count_broken, count_total, round(count_broken / count_total * 100), '%')

TypeError: 'PzClue' object is not subscriptable