# Experimentation

## Parser for OData Queries

In [141]:
# Test cases
test_cases = [
    "ColA eq 3",
    "ColB eq 'abc'",
    "ColA eq 3 and ColB eq 'abc'",
    "startswith(ColA, 'hello')",
    "(ColA eq 3 and ColB eq 'abc') or (ColC eq 55 and ColD = 'lel')",
    "startswith(ColA, 'hello') and substringof('hello', ColB)",
]

In [145]:
import re

def parse_odata_filter(query):
    # lt
    query = query.replace(' lt ', ' < ')
    # le
    query = query.replace(' le ', ' <= ')
    # gt
    query = query.replace(' gt ', ' > ')
    # ge
    query = query.replace(' ge ', ' >= ')
    # eq
    query = query.replace(' eq ', ' = ')
    # ne
    query = query.replace(' ne ', ' != ')
    # startswith(column, string)
    matches_sw = re.match('startswith\(.*?\)', query.lower())
    if matches_sw:
        span_sw = matches_sw.span()
        sw_query = query[span_sw[0]:span_sw[1]]
        # Extract text between brackets
        sw_terms = re.sub('.*\(', '', sw_query)
        sw_terms = re.sub('\).*', '', sw_terms)
        sw_terms = [s.strip() for s in sw_terms.split(',')]
        sw_terms[1] = re.sub('[^a-zA-Z0-9]', '', sw_terms[1])
        query = re.sub(sw_query.replace('(', '\(').replace(')', '\)'), f"{sw_terms[0]} LIKE '{sw_terms[1]}%'", query)
        
    # substringof(string, column)
    matches_so = re.search('substringof\(.*?\)', query.lower())
    if matches_so:
        span_so = matches_so.span()
        so_query = query[span_so[0]:span_so[1]]
        # Extract text between brackets
        so_terms = re.sub('.*\(', '', so_query)
        so_terms = re.sub('\).*', '', so_terms)
        so_terms = [s.strip() for s in so_terms.split(',')]
        so_terms[0] = re.sub('[^a-zA-Z0-9]', '', so_terms[0])
        query = re.sub(so_query.replace('(', '\(').replace(')', '\)'), f"{so_terms[1]} LIKE '%{so_terms[0]}%'", query)
    # day()
    # month()
    # year()
    # hour()
    # minute()
    # second()
    return query

In [146]:
parse_odata_filter(test_cases[-1])

"ColA LIKE 'hello%' and ColB LIKE '%hello%'"

In [147]:
for test_case in test_cases:
    print(parse_odata_filter(test_case))

ColA = 3
ColB = 'abc'
ColA = 3 and ColB = 'abc'
ColA LIKE 'hello%'
(ColA = 3 and ColB = 'abc') or (ColC = 55 and ColD = 'lel')
ColA LIKE 'hello%' and ColB LIKE '%hello%'
