In [15]:
from omnibelt import pformat as original_pformat
from string import Formatter

In [10]:

class PowerFormatter(Formatter):
	# TODO: partial formatting - only format fields that are specified, and leave others as is
	def get_field(self, field_name, args, kwargs):
		try:
			return super().get_field(field_name, args, kwargs)
		except: # TODO: find the right exception
			return eval(self.vformat(field_name, args, kwargs), kwargs), field_name
			# return f'{{{field_name}}}', field_name


	def parse(self, s):
		start_idx = -1
		escaped = ''
		pre_idx = 0
		counter = 0
		idx = 0
		
		parity = sum((-1)**(c=='{') for c in s if c in '{}')
		assert parity % 2 == 0, f'Unbalanced brackets in {s!r} (remember to escape with double brackets)'
		
		tree = []
		
		

		while idx < len(s):
			open_idx = s.find("{", idx)
			close_idx = s.find("}", idx)

			if open_idx == -1 and close_idx == -1:
				# no more fields - yield the rest of the string
				if counter == 0:
					# raise StopIteration
					# print(f'ending with: {escaped + s[idx:]!r}')
					yield escaped + s[idx:], None, '', None
				else:
					raise ValueError("Mismatched '{' at index {}".format(start_idx))
				break

			if open_idx != -1 and (open_idx < close_idx or close_idx == -1):
				# found an open bracket - add everything up to it to the escaped string, and increment the (depth) counter
				if counter == 0:
					# yield (s[idx:open_idx], None)
					start_idx = open_idx
					pre_idx = idx
				idx = open_idx + 1
				counter += 1

			if close_idx != -1 and (close_idx < open_idx or open_idx == -1):
				# found a close bracket - decrement the (depth) counter, and if it's 0, yield the field
				if counter == 0:
					raise ValueError(f"Mismatched '}}' at index {close_idx}")
				counter -= 1
				if counter == 0:
					pre = s[pre_idx:start_idx] # everything so far that needs to be escaped
					field = s[start_idx + 1:close_idx] # the field itself (everything between the brackets)
					if field.startswith("{") and close_idx+1 < len(s) and s[close_idx+1] == "}": # double brackets - escape the inner ones
						escaped = pre + '{'
						for lit, fd, spec, conv in self.parse(field[1:]):
							if escaped is not None:
								lit = escaped + lit + '}'
								escaped = ''
							yield lit, fd, spec, conv

						close_idx += 1
						# escaped = '}'

					else:
						# spec = None
						lim = field.rfind('}')
						conv_idx = field[lim+1:].find('!')
						if conv_idx != -1:
							conv = field[lim+2+conv_idx:]
							field = field[:lim+1+conv_idx]
						else:
							conv = None

						if conv is None:
							lim = field.rfind(']')
							spec_idx = field[lim+1:].find(':')
							if spec_idx != -1:
								spec = field[lim+2+spec_idx:]
								field = field[:lim+1+spec_idx]
							else:
								spec = ''
						else:
							spec_idx = conv.find(':')
							if spec_idx != -1:
								spec = conv[spec_idx+1:]
								conv = conv[:spec_idx]
							else:
								spec = ''

						# print(f'yielding: {escaped + pre!r}, {field!r}, {spec!r}, {conv!r}')
						# field = eval(self._format(field), self._world)
						yield escaped + pre, field, spec, conv
						escaped = ''
					start_idx = -1
				idx = close_idx + 1



def pformat(s, **vars):
	"""
	Evaluates the keys in the given string as expressions using the given variables (recursively)
	"""
	fmt = PowerFormatter()
	return fmt.format(s, **vars)

raw = '''```python
verbalizations = {{
  "Smoking Rate": {{
    1: {{"variable": ["the smoking rate", "proportion of smokers"]}},
    2: {{"subject": "people", "value": {{0: ["do not smoke", "are non-smokers"], 1: ["smoke", "are smokers"]}}}},
    3: {{"value": {{0: ["people do not smoke", "people are non-smokers"], 1: ["people smoke", "people are smokers"]}}}},
    4: {{"domain": "people", "value": {{0: ["do not smoke", "are non-smokers"], 1: ["smoke", "are smokers"]}}}},
    5: {{"value": {{0: ["the smoking rate decreases", "people stop smoking"], 1: ["the smoking rate increases", "people start smoking"]}}}},
    6: {{"value": {{0: ["For people that do not smoke", "Among non-smokers"], 1: ["For people that smoke", "Among smokers"]}}}},
    7: {{"value": {{0: ["people quite smoking", "people become non-smokers"], 1: ["people become smokers", "people take up smoking"]}}}},
  }},
  ...
}}
```'''

ls = [1]

raw = '''{ls[0]} is equivalent to {{ls}[0]}''' # '''1 is equivalent to 1'''
raw = '''{{f}({a})}'''
raw = '''l1 = {ls[{key1}]} vs l2 = {{ls}[0]}'''

raw = '''{{ {{content}}, }}''' # '''{ {content}, }'''

print(pformat(raw))

KeyboardInterrupt: 

In [16]:
original_pformat('hello {\'test\'}')

'hello test'

In [18]:
original_pformat('hello {"test"}', **{'"test"': 'world'})

'hello world'

In [20]:
import ast

In [24]:
def find_variables_in_expression(expression):
	"""
	Find all variables used in a given Python expression.
	
	Args:
	expression (str): A string representing a Python expression.
	
	Returns:
	list: A list of variable names used in the expression.
	"""
	try:
		tree = ast.parse(expression, mode='eval')
	except SyntaxError:
		return "The input is not a valid expression."

	class VariableVisitor(ast.NodeVisitor):
		def __init__(self):
			self.variables = []

		def visit_Name(self, node):
			if node.id not in self.variables:
				self.variables.append(node.id)

	visitor = VariableVisitor()
	visitor.visit(tree)
	return list(visitor.variables)
 

In [31]:
find_variables_in_expression('ls[key1.endswith(key2)]')

['ls', 'key1', 'key2']

In [32]:
find_variables_in_expression('{-1:"no", 1:"yes"}[int(answer)]')

['int', 'answer']

In [35]:
a, b = 'a', 'b'
a + b, 'a' 'b'

('ab', 'ab')

In [36]:
' {{treatment}{int(treated)}_wheresentence}'

' {{treatment}{int(treated)}_wheresentence}'

In [37]:
find_variables_in_expression('treatment+treated+"_wheresentence"')

['treatment', 'treated']

In [39]:
find_variables_in_expression('f\'{treatment}{treated}_wheresentence\'')

['treatment', 'treated']

In [55]:
s1 = 'look at {me!r} and not at {{treatment}{treated}_noun!r}, {{and also {info[{f\'{treatment}{treated}\'}]}'
s2 = '{{treatment}{treated}_noun!r} == {f\'{treatment}{treated}_noun\'!r} {{'

In [48]:
s1.format(me='me', treatment='X', treated=1, info={'X1': 'cool', 'X0': 'really cool'})

ValueError: unexpected '{' in field name

In [78]:
fields, opens, closes = parse_bracket_tree(s1)
fields, opens, closes

([(8, 13, []),
  (26, 54, [(27, 37, []), (38, 46, [])]),
  (68, 100, [(74, 98, [(77, 87, []), (88, 96, [])])])],
 [57],
 [])

In [100]:


						# for lit, fd, spec, conv in self.parse(field[1:]):
						# 	if escaped is not None:
						# 		lit = escaped + lit + '}'
						# 		escaped = ''
						# 	yield lit, fd, spec, conv

import re

class PowerFormatter(Formatter):
	# TODO: partial formatting - only format fields that are specified, and leave others as is
	def get_field(self, field_name, args, kwargs):
		return field_name
		# try:
		# 	return super().get_field(field_name, args, kwargs)
		# except: # TODO: find the right exception
		# 	return eval(self.vformat(field_name, args, kwargs), kwargs), field_name
		# 	# return f'{{{field_name}}}', field_name
	
	@staticmethod
	def parse_bracket_tree(s):
		'''
		Parses a string with bracket escapes into a tree of bracketed fields
		
		Output: [(start, end, children), ...]
		where start and end are the indices of the brackets, and children is a list of children nodes
		if children is None, then it's an escaped bracket
		'''
		stack = []
		extra_closes = []
		pairs = []
		for i, c in enumerate(s):
			if c == '{':
				stack.append(i)
			elif c == '}':
				if len(stack):
					start = stack.pop()
					pairs.append((start, i))
				else:
					extra_closes.append(i)
		
		# remove unpaired double bracket escapes
		assert all(i+1 in stack or i-1 in stack for i in stack), f'Unbalanced opening brackets in {s!r} (remember to use double brackets to escape)'
		assert all(i+1 in extra_closes or i-1 in extra_closes for i in extra_closes), f'Unbalanced closing brackets in {s!r} (remember to use double brackets to escape)'
		
		escaped_nodes = ([(i, i+1, None) for i in stack if i+1 in stack] # escaped opening brackets
						 + [(i, i+1, None) for i in extra_closes if i+1 in extra_closes]) # escaped closing brackets
		escaped_pairs = [(start, end) for start, end in pairs if (start-1, end+1) in pairs]
		for start, end in escaped_pairs:
			pairs.remove((start-1, end+1))
			pairs.remove((start, end))
			escaped_nodes.append((start-1, start, None))
			escaped_nodes.append((end, end+1, None))
		
		if not len(pairs):
			return sorted(escaped_nodes)
		
		def build_interval_tree(remaining, lim=None):
			nodes = []
			while remaining and (lim is None or remaining[-1][0] < lim):
				start, end = remaining.pop()
				nodes.append((start, end, build_interval_tree(remaining, end)))
			return nodes
		tree = build_interval_tree(sorted(pairs, reverse=True))
		return sorted(escaped_nodes + tree)
		
	
	_style_suffix_pattern = r"([^:!]+)((!r|!s|!a)?(:[<>=^]?[\+\- ]?#?0?\d*,?\d*\.?\d*[bcdeEfFgGnoxX%])?)$"
	def parse_field_content(self, content):
		match = re.match(self._style_suffix_pattern, content)
		if match:
			field, style = match.group(1), match.group(2)
			if field.endswith('='):
				field = field[:-1]
			return field, style
		field = content[:-1] if content.endswith('=') else content
		style = '=' if content.endswith('=') else None
		return field, style
		
		
	def emit_fields(self, s, start, end, children, fn):
		for child in children:
			yield from self.emit_fields(s, *child, fn)
		expr, style = self.parse_field_content(s[start+1:end])
		yield fn(expr, style)
	
	
	def evaluator(self, expr, style):
		return (expr, style)
	
	
	def viewer(self, expr, style):
		return (expr, style)


	def full_parse(self, s, fn=None):
		if fn is None:
			fn = self.evaluator
		
		nodes = self.parse_bracket_tree(s)
		
		idx = 0
		for start, end, children in nodes:
			if children is None:
				if end > idx:
					literal = s[idx:end-1]
					yield literal, None, '', None
					idx = end+1
			else:
				yield from self.emit_fields(s, start, end, children, fn)
				idx = end+1
	

	def parse(self, s, fn=None):
		if fn is None:
			fn = self.evaluator
		
		nodes = self.parse_bracket_tree(s)
		
		idx = 0
		for start, end, children in nodes:
			if children is None:
				if end > idx:
					literal = s[idx:end-1]
					yield literal, None, '', None
					idx = end+1
			
			else:
				
				
				pass
			
			
			
			if idx < start:
				yield s[idx:start], None, '', None
			if children is None:
				yield s[start+1:end], None, '', None
			else:
				yield s[start+1:end], children, '', None
			idx = end+1
		
		
		pass

In [101]:
pf = PowerFormatter()

In [102]:
list(pf.full_parse('hello {\'test\'}', pf.viewer))

[("'test'", '')]

In [114]:
class PowerFormatter(Formatter):
	def evaluate_expr(self, expr, style):
		return (expr, style)
		
	
	def parse(self, s):
		nodes = self.parse_bracket_tree(s)
		
		self.idx = 0
		for start, end, children in nodes:
			if children is None:
				if end > self.idx:
					literal = s[self.idx:end-1]
					yield literal, None, '', None
					self.idx = end+1
			else:
				yield from self.emit_fields(s, start, end, children)
				self.idx = end+1
	
		if self.idx < len(s):
			yield s[self.idx:], None, '', None
	
	
	@staticmethod
	def parse_bracket_tree(s):
		'''
		Parses a string with bracket escapes into a tree of bracketed fields
		
		Output: [(start, end, children), ...]
		where start and end are the indices of the brackets, and children is a list of children nodes
		if children is None, then it's an escaped bracket
		'''
		stack = []
		extra_closes = []
		pairs = []
		for i, c in enumerate(s):
			if c == '{':
				stack.append(i)
			elif c == '}':
				if len(stack):
					start = stack.pop()
					pairs.append((start, i))
				else:
					extra_closes.append(i)
		
		# remove unpaired double bracket escapes
		assert all(i+1 in stack or i-1 in stack for i in stack), f'Unbalanced opening brackets in {s!r} (remember to use double brackets to escape)'
		assert all(i+1 in extra_closes or i-1 in extra_closes for i in extra_closes), f'Unbalanced closing brackets in {s!r} (remember to use double brackets to escape)'
		
		escaped_nodes = ([(i, i+1, None) for i in stack if i+1 in stack] # escaped opening brackets
						 + [(i, i+1, None) for i in extra_closes if i+1 in extra_closes]) # escaped closing brackets
		escaped_pairs = [(start, end) for start, end in pairs if (start-1, end+1) in pairs]
		for start, end in escaped_pairs:
			pairs.remove((start-1, end+1))
			pairs.remove((start, end))
			escaped_nodes.append((start-1, start, None))
			escaped_nodes.append((end, end+1, None))
		
		if not len(pairs):
			return sorted(escaped_nodes)
		
		def build_interval_tree(remaining, lim=None):
			nodes = []
			while remaining and (lim is None or remaining[-1][0] < lim):
				start, end = remaining.pop()
				nodes.append((start, end, build_interval_tree(remaining, end)))
			return nodes
		tree = build_interval_tree(sorted(pairs, reverse=True))
		return sorted(escaped_nodes + tree)
		
	
	_style_suffix_pattern = r"([^:!]+)((!r|!s|!a)?(:[<>=^]?[\+\- ]?#?0?\d*,?\d*\.?\d*[bcdeEfFgGnoxX%])?)$"
	def parse_field(self, field):
		match = re.match(self._style_suffix_pattern, field)
		if match:
			content, style = match.group(1), match.group(2)
			if content.endswith('='):
				content = content[:-1]
			return content, style
		content = field[:-1] if field.endswith('=') else field
		style = '=' if field.endswith('=') else None
		return content, style
		
		
	def emit_fields(self, s, start, end, children):
		for child in children:
			yield from self.emit_fields(s, *child)
		content, style = self.parse_field(s[start+1:end])
		yield s[self.idx:start], content, style, None
	
	
	def format_field(self, value, format_spec):
		print(f'formatting {value!r} with {format_spec!r}')
		return value
		
	
	def get_field(self, field_name, args, kwargs):
		print(f'getting {field_name!r} from {args!r} and {kwargs!r}')
		return field_name, field_name
	
	
	def evaluator(self, expr, style):
		return (expr, style)
	
	
	def viewer(self, expr, style):
		return (expr, style)
	
pf = PowerFormatter()
list(pf.parse('hello {\'test\'} after'))

[('hello ', "'test'", '', None), (' after', None, '', None)]

In [115]:
pf.format(' }} hello {\'test\'} after')

getting "'test'" from () and {}
formatting "'test'" with ''


"  hello 'test' after"

In [85]:
pf.parse_field_content('test!r')

('test!r', None)

In [86]:
t = 'test!r'
pattern = r'^(=)?(.!(s|r|a))?:(.*)((<|>|=|\^)[-+ ]?(\d+)?(\.\d+)?([bcdeEfFgGnosxX%]))?$'
match = re.match(pattern, t)

In [91]:
def extract_formatting_info_strict_v3(s):
    # Further refined regular expression to handle cases like "test=" and "test!r"
    pattern = r"([^:!]+)((!r|!s|!a)?(:[<>=^]?[\+\- ]?#?0?\d*,?\d*\.?\d*[bcdeEfFgGnoxX%])?)$"
    match = re.match(pattern, s)
    if match:
        field_name, format_specifier = match.group(1), match.group(2)
		if field_name.endswith('='):
			field_name = field_name[:-1]
        return (field_name, format_specifier) if format_specifier else (field_name, None)
    else:
		if s.endswith('='):
			s = s[:-1]
			
        return (s, None)


# Example usage
t = 'test!r'
print(extract_formatting_info_strict_v3(t))

('test', '!r')


In [87]:
match

In [75]:

	
# Example usage
intervals = [(8, 13), (26, 54), (27, 37), (38, 46), (68, 100), (74, 98), (77, 87), (88, 96)]
interval_tree = build_interval_tree(sorted(intervals, reverse=True))
interval_tree


[(8, 13, []),
 (26, 54, [(27, 37, []), (38, 46, [])]),
 (68, 100, [(74, 98, [(77, 87, []), (88, 96, [])])])]

In [41]:
def parse_brackets_advanced_v3(s):
    stack = [[]]  # Stack to keep track of nesting levels
    current = []  # Current string buffer
    i = 0  # Index for manual traversal

    while i < len(s):
        char = s[i]

        if char == '{':
            # Check for escaped opening bracket
            if i + 1 < len(s) and s[i + 1] == '{':
                # It's an escaped opening bracket, treat as literal
                current.append('{')
                i += 2
            else:
                # Start of a new level, push current buffer to stack and reset it
                stack.append(current)
                current = []
                i += 1
        elif char == '}':
            # Check for escaped closing bracket
            if i + 1 < len(s) and s[i + 1] == '}':
                # It's an escaped closing bracket, treat as literal
                current.append('}')
                i += 2
            else:
                # End of current level, pop from stack and append current buffer to it
                if len(stack) > 1:
                    nested = ''.join(current)
                    current = stack.pop()
                    current.append(nested)
                    i += 1
                else:
                    # Unbalanced closing bracket, treat it as regular text
                    current.append('}')
                    i += 1
        else:
            # Regular text, add it to current buffer
            current.append(char)
            i += 1

    # Joining the remaining text, if any
    return ''.join(current)  # Return the last buffer, which is the root level

# Test the function again
test_string = "This is a {test string with {nested {brackets}} and some {{escaped ones}}}"
parsed_text_advanced_v3 = parse_brackets_advanced_v3(test_string)
parsed_text_advanced_v3



'nested brackets} and some {escaped ones}'