Skip to content

Commit

Permalink
Separate expand to its own library (#682)
Browse files Browse the repository at this point in the history
* Separate expand completely

* Make expansion a library
  • Loading branch information
angelhof committed May 31, 2023
1 parent ec99291 commit 7686c77
Show file tree
Hide file tree
Showing 31 changed files with 247 additions and 1,012 deletions.
6 changes: 4 additions & 2 deletions compiler/ast_to_ir.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import subprocess

from shasta.ast_node import *
from sh_expand.expand import expand_command, ExpansionState

from shell_ast.ast_util import *
from ir import *
from util import *
from parse import from_ast_objects_to_shell
from shell_ast.expand import expand_command, ExpansionState
import subprocess

## TODO: Separate the ir stuff to the bare minimum and
## try to move this to the shell_ast folder.
Expand Down
179 changes: 4 additions & 175 deletions compiler/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
import os
import subprocess
import math
import shlex

from datetime import datetime

from util import *

Expand Down Expand Up @@ -250,179 +247,11 @@ def init_log_file():
pass


def is_array_variable(token):
return ('a' in token)

## Based on the following:
## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting
def ansi_c_expand(string):
return bytes(string, "utf-8").decode("unicode_escape")

## This finds the end of this variable/function
def find_next_delimiter(tokens, i):
if (tokens[i] == "declare"):
return i + 3
else:
## TODO: When is this case actually useful?
j = i + 1
while j < len(tokens) and (tokens[j] != "declare"):
j += 1
return j

def parse_array_variable(tokens, i):
## The `declare` keyword
_declare = tokens[i]
## The type
declare_type = tokens[i+1]
assert(is_array_variable(declare_type))

## The variable name and first argument
## TODO: Test with empty array and single value array
name_and_start=tokens[i+2]
first_equal_index = name_and_start.find('=')

## If it doesn't contain any = then it is empty
if first_equal_index == -1:
## Then the name is the whole token,
## the type is None (TODO)
## and the value is empty
return name_and_start, None, "", i+3

var_name = name_and_start[:first_equal_index]
array_start = name_and_start[first_equal_index+1:]

var_values = []
if array_start == "()":
next_i = i+3
else:
## Remove the opening parenthesis
array_item = array_start[1:]

## Set the index that points to array items
curr_i = i+2

done = False
while not done:
## TODO: Is this check adequate? Or could it miss the end
## (or be misleaded into an earlier end by the item value?)
if array_item.endswith(")"):
done = True
array_item = array_item[:-1]

first_equal_index = array_item.find('=')
## Find the index and value of the array item
item_index_raw = array_item[:first_equal_index]
item_value = array_item[first_equal_index+1:]

## Sometimes the value starts with a dollar mark, see Bash ANSI-C quoting:
## https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html#ANSI_002dC-Quoting
if item_value.startswith("$"):
## TODO: Figure out if this is adequate
item_value = ansi_c_expand(item_value[1:])

item_index = int(item_index_raw[1:-1])

## Add None values if the index is larger than the next item (see Bash sparse arrays)
## TODO: Keep bash array values as maps to avoid sparse costs
var_values += [None] * (item_index - len(var_values))
## Set the next item
var_values.append(item_value)



## Get next array_item
curr_i += 1
array_item = tokens[curr_i]

next_i = curr_i

## TODO: Michael?
var_type = None

return var_name, var_type, var_values, next_i

##
## Read a shell variables file
## Set the shell variables
##

def read_vars_file(var_file_path):
global config

log("Reading variables from:", var_file_path)


config['shell_variables'] = None
def set_vars_file(var_file_path: str, var_dict: dict):
global config
config['shell_variables'] = var_dict
config['shell_variables_file_path'] = var_file_path
if(not var_file_path is None):
vars_dict = {}
# with open(var_file_path) as f:
# lines = [line.rstrip() for line in f.readlines()]

with open(var_file_path) as f:
variable_reading_start_time = datetime.now()
data = f.read()
variable_reading_end_time = datetime.now()
print_time_delta("Variable Reading", variable_reading_start_time, variable_reading_end_time)

variable_tokenizing_start_time = datetime.now()
## TODO: Can we replace this tokenizing process with our own code? This is very slow :'(
## It takes about 15ms on deathstar.
tokens = shlex.split(data)
variable_tokenizing_end_time = datetime.now()
print_time_delta("Variable Tokenizing", variable_tokenizing_start_time, variable_tokenizing_end_time)
# log("Tokens:", tokens)

# MMG 2021-03-09 definitively breaking on newlines (e.g., IFS) and function outputs (i.e., `declare -f`)
# KK 2021-10-26 no longer breaking on newlines (probably)

## At the start of each iteration token_i should point to a 'declare'
token_i = 0
while token_i < len(tokens):
# FIXME is this assignment needed?
export_or_typeset = tokens[token_i]

## Array variables require special parsing treatment
if (export_or_typeset == "declare" and is_array_variable(tokens[token_i+1])):
var_name, var_type, var_value, new_token_i = parse_array_variable(tokens, token_i)
vars_dict[var_name] = (var_type, var_value)
token_i = new_token_i
continue

new_token_i = find_next_delimiter(tokens, token_i)
rest = " ".join(tokens[(token_i+1):new_token_i])
token_i = new_token_i

space_index = rest.find(' ')
eq_index = rest.find('=')
var_type = None

## Declared but unset?
if eq_index == -1:
if space_index != -1:
var_name = rest[(space_index+1):]
var_type = rest[:space_index]
else:
var_name = rest
var_value = ""
## Set, with type
elif(space_index < eq_index and not space_index == -1):
var_type = rest[:space_index]

if var_type == "--":
var_type = None

var_name = rest[(space_index+1):eq_index]
var_value = rest[(eq_index+1):]
## Set, without type
else:
var_name = rest[:eq_index]
var_value = rest[(eq_index+1):]

## Strip quotes
if var_value is not None and len(var_value) >= 2 and \
var_value[0] == "\"" and var_value[-1] == "\"":
var_value = var_value[1:-1]

vars_dict[var_name] = (var_type, var_value)

config['shell_variables'] = vars_dict

0 comments on commit 7686c77

Please sign in to comment.