In [None]:
import tatqa_utils
import pandas as pd

In [None]:
devdf = pd.read_json('dataset_raw/tatqa_dataset_dev.json')

In [None]:
import pandas as pd
import pyreadstat
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
import utils
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
import numpy as np
from progress.bar import Bar
%load_ext autoreload
%autoreload 2

In [None]:
import os
with open('dataset_raw/openai.api.key', 'r') as filek: 
    openai_key = filek.read()
os.environ["OPENAI_API_KEY"] =  openai_key 

In [None]:
from langchain.globals import set_llm_cache
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI
from langchain_community.cache import SQLiteCache

llm = ChatOpenAI(temperature=0)
#llm = OpenAI(temperature=0) 
set_llm_cache(SQLiteCache(database_path=".langchain.db"))

In [None]:
import re 
def remove_thousand_separators(text):
    # This regular expression matches numbers with commas as thousand separators
    return re.sub(r'(?<=\d),(?=\d{3})', '', text)
    
def transform_elements(list_of_lists, transform_func):    
    return [
        [transform_func(item) for item in sublist]
        for sublist in list_of_lists
    ]
    
def replace_bracketed_numbers(text):
    return re.sub(r"\((\d+)\)", lambda x: f"-{x.group(1)}", text)

In [None]:
def gen_code(llm, question, table, paragraphs):    
        prompt = f"Generate a Python code that can answer the following question using the following paragraphs and a table! The code must contain only one function called 'run', and no wrapping class. The function must return numeric float results with accuracy to two decimal places and format (value, scale). Scale is usually described in paragraphs, and usually is 'thousand','million' or empty string, but sometimes it is percent. Do not write explanation, just code.\n\nQuestion: {question}  \n\nTable: {table}\n\n Paragraphs: {paragraphs} "
        prompt = remove_thousand_separators(prompt)
        #prompt = replace_bracketed_numbers(prompt)
        prompt = prompt.replace('$','') 
        res = llm.invoke(prompt)
        code = res.content.replace('```python','').replace('```','')
        return (prompt, code)
    
def exec_code(code, table):  
        table = remove_thousand_separators(f"{table}")
        #table = replace_bracketed_numbers(table)
        table = table.replace('$','')
        #code = code.replace(\"* 1000000\",\"\").replace(\"*1000000\",\"\").replace(\"/ 1000000\",\"\").replace(\"/1000000\",\"\").replace(\"/ 1000\",\"\").replace(\"/1000\",\"\")
        #code = code.replace('abs(','(')
        try: 
            loc = locals()   
            if not "run()" in code:
                exec(code + f"\nr = run({table})\n", globals(), loc)
            else: 
                exec(code + "\nr = run()\n", globals(), loc)
            return loc['r']
        except Exception as e:
                s = '[Error]'+ str(e)
                print(s)
                return (s,'')

In [None]:
from tqdm.notebook import tqdm as log_progress

res = []
for i, item in log_progress(devdf.iterrows()):
    table = item['table']['table']
    #table = transform_elements(table, remove_thousand_separators)
    #table = [remove_thousand_separators(cell) for row in table for cell in row]
    #table = [cell.replace('$', '') for row in table for cell in row]
    #print (table)
    for q in item['questions']:        
        if q['answer_type'] == 'arithmetic' and 'table' in q['answer_from']:             
            ( prompt, code)  = gen_code(llm, q['question'], table)
            r = exec_code(code, table)
            err =  None
            if r is None:
                (pred_value, pred_unit) = ("", "")
            else:                
                if len(r) == 2:
                    (pred_value, pred_scale) = r    
                    if isinstance(pred_value, tuple) and len(pred_value) == 2:
                        print('$$$$')
                        (pred_value, pred_scale) = pred_value 
                    
                    if pred_unit == "%" or pred_unit == "percentage"  :
                        pred_unit = 'percent'
                    if pred_unit not in ["", 'thousand', 'million', 'billion', 'percent']:
                        print('Invalid ', pred_unit)
                        pred_unit = ""    
                    if isinstance(pred_value, str):
                        print("string")
                        if  pred_value.startswith('[Error]'):
                            (pred_value, pred_unit) = ("", "")
                            err,_ = r                                                        
                        
                elif len(r) == 1:
                    (pred_value, pred_unit) = (r, "")        
                else:
                    (pred_value, pred_unit) = ("", "")        
                
            #res.append({"table":table, "q":q, "pred":pred, "code": code})
            res.append(({"answer_type":q["answer_type"], "answer": q["answer"], 'scale': q["scale"]}, pred_value, pred_unit, q, code, prompt, item['table'], err))


In [None]:
res

In [None]:
from tatqa_metric import TaTQAEmAndF1

metrics = TaTQAEmAndF1()

for ans, pred, pred_scale, _,_, _,_,_ in res:
    metrics(ans, pred, pred_scale)
pred_em, pred_f1, scale_score, op_score = metrics.get_overall_metric(reset=False)
print( pred_em, pred_f1, scale_score)

In [None]:
[m for m in metrics._details if m['f1']<0.2]

In [None]:
metrics._details

In [None]:
good = 0
for ans, pred, pred_scale, q,code, _,table,err in res:
    llimit = ans['answer']*0.99
    ulimit = ans['answer']*1.01
    
    if pred  is None or isinstance(pred, tuple) or isinstance(pred, str):
        #if ans['scale'] != pred_scale:
        #    print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' <> ' , ans['answer'], ans['scale'])
       
        #type error
        #if isinstance(pred, str):
        #    print (pred, pred_scale, ' -- ' , ans['answer'])
        #if isinstance(pred, tuple):
        #    print (pred, pred_scale, ' == ' , ans['answer'], ans['scale'])
        #print('===============================================================================')
        print('{',pred, '}', ans['answer'], ans['scale'], err)
        continue
    
    #pred = round(pred, 2)
    
    if (pred > 0 and llimit < pred and pred < ulimit) or (pred < 0 and llimit > pred and pred > ulimit) or pred==ulimit or pred == llimit:
        good = good + 1
        if ans['scale'] == pred_scale and  pred_scale == 'thousand':
            print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' == ' , ans['answer'], ans['scale'])
        
        #if ans['scale'] != pred_scale:
        #    print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' <> ' , ans['answer'], ans['scale'])
        #if  ans['scale'] == 'thousand':
        #    print (q['uid'], pred, pred_scale, ' == ' , ans['answer'], ans['scale'])
          
    else:
        #value error
        #  if ans['scale'] != pred_scale:
        #    print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' <> ' , ans['answer'], ans['scale'])
        #if ans['answer'] != pred and ans['scale'] == pred_scale and pred_scale != "":
        #    print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' <> ' , ans['answer'], ans['scale'])
       
        None
        #print (q['uid'], pred, pred_scale, ' <> ' , ans['answer'], ans['scale'])
        #print(q['question'])
        #print(table['table'])
        #print(code)
        
    #if isinstance(pred, str):
    #    print (pred, pred_scale, ' -- ' , ans['answer'])
    #if isinstance(pred, tuple):
    #    print (pred, pred_scale, ' == ' , ans['answer'], ans['scale'])
        
print (good/len(res))

In [None]:

def parse_scale(scale):
    if scale.lower() == "thousands" or "000" in scale:
        return "thousand"
    if scale.lower() == "millions" or scale.lower() == "£m" or scale.lower() == "$m" or scale.lower() == "m":
        return "million"
    return scale.lower()



In [None]:
res_scale_para = {}
for idx, item in log_progress(devdf.iterrows()): 
    #item = devdf.iloc[2]
    table = item['table']['table']
    pars = [p['text'] for p in item['paragraphs']]
    prompt = f"The context contains some paragraphs that describe a table. Search for text that explicitly states the scale of the values in the table. Scale is usally million, thousand, billion or none, if no specific scale found. Return just the scale, no other words.\n Paragraphs: {pars}"
    scale = llm.invoke(prompt).content
    scale = parse_scale(scale)
    res_scale_para[item['table']['uid']] = scale

In [None]:
res_scale_table = {}
for idx, item in log_progress(devdf.iterrows()): 
    #item = devdf.iloc[2]
    table = item['table']['table']
    pars = [p['text'] for p in item['paragraphs']]
    prompt = f"The context contains a table. Search for the scale of the values in table. Scale can be million, thousand, billion or None, if no specific scale found. Return just the scale, no other words.\n Table: {table}"
    scale = llm.invoke(prompt).content
    scale = parse_scale(scale)
    res_scale_table[item['table']['uid']] = scale

In [None]:
res_scale = {}
for idx, item in log_progress(devdf.iterrows()): 
    #item = devdf.iloc[2]
    table = item['table']['table']
    pars = [p['text'] for p in item['paragraphs']]
    prompt = f"The context contains some paragraphs that describe a table. Search for text that explicitly states the scale of the values in the table. Scale can be abbreviated in tables. in response use scale  million, thousand, billion or none, if no scale contains exactly in the text. Do NOT try to infer it from values.  \nParagraphs: {pars} \nTable: {table}"
    scale = llm.invoke(prompt).content
    scale = parse_scale(scale)
    res_scale[item['table']['uid']] = scale

In [None]:
res = []
good = 0
pct = 0
cnt = 0
for idx, item in devdf.iterrows():
    for q in item['questions']:        
        if q['answer_type'] == 'arithmetic' and 'table' in q['answer_from']: 
            if not item['table']['uid'] in res_scale:
                continue
            if q['scale'] in res_scale[item['table']['uid']]:
                good = good + 1
            elif q['scale'] != 'percent':
                print(q['uid'], q['scale'], res_scale[item['table']['uid']])
            else:
                pct = pct +1
            cnt = cnt+1
print(  (cnt -good-pct)/cnt, good, pct, cnt, cnt -good-pct)

In [None]:
res = []
good = 0
pct = 0
cnt = 0
for idx, item in devdf.iterrows():
    for q in item['questions']:        
        if q['answer_type'] == 'arithmetic' and 'table' in q['answer_from']: 
            if q['scale'] == res_scale_para[item['table']['uid']]:
                good = good + 1
            elif q['scale'] != 'percent':
                print(q['uid'], q['scale'], res_scale_para[item['table']['uid']])
            else:
                pct = pct +1
            cnt = cnt+1
print(  (cnt -good-pct)/cnt, good, pct, cnt, cnt -good-pct)

In [None]:
res_scale_pct = {}
for idx, item in log_progress(devdf.iterrows()): 
    #item = devdf.iloc[2]
    table = item['table']['table']
    pars = [p['text'] for p in item['paragraphs']]
    for q in item['questions']:        
        if q['answer_type'] == 'arithmetic' and 'table' in q['answer_from']: 
            prompt = f"Classify the following question. Is it about percentage calculation?  Please answer with one word, yes or no.  \nQuestion: {q['question']}"
            scale = llm.invoke(prompt).content
            res_scale_pct[q['uid']] = {'percent':scale, 'golden_scale': q['scale']}

In [None]:
res = []
good = 0
pct = 0
cnt = 0
for idx, item in devdf.iterrows():
    for q in item['questions']: 
        if not q['uid'] in res_scale_pct:
            continue
        if q['answer_type'] == 'arithmetic' and 'table' in q['answer_from']: 
            if( q['scale'] == 'percent' and res_scale_pct[q['uid']]['percent'] == 'Yes')or  (q['scale'] != 'percent' and res_scale_pct[q['uid']]['percent'] == 'No') :
                good = good + 1
            else:
                None
                #print(q['scale'], res_scale_pct[q['uid']] )
            cnt = cnt + 1
print(  (cnt -good-pct)/cnt, good, pct, cnt, cnt -good-pct)


In [None]:
res_scale_pct

In [None]:
res = []
good = 0
pct = 0
cnt = 0
for idx, item in devdf.iterrows():
    for q in item['questions']:        
        if q['answer_type'] == 'arithmetic' and 'table' in q['answer_from']: 
            if q['scale'] == res_scale_table[item['table']['uid']]:
                good = good + 1
            elif q['scale'] != 'percent':
                print(q['uid'],'\t',  q['scale'], res_scale_table[item['table']['uid']],'\t',  res_scale_para[item['table']['uid']])
            else:
                pct = pct +1
            cnt = cnt+1
print(  (cnt -good-pct)/cnt, good, pct, cnt, cnt -good-pct)

In [208]:
def gen_code(llm, question, table, paragraphs):    
        prompt = f"Generate a Python function 'run(table)' that can answer the following question using the following table! The function must use text search in row selection and return numeric float results with accuracy to two decimal places. If result value or calculation is percent calculation return format (value, 'percent'), every orher case return (value, '') . Do not write explanation, just code.\n\nQuestion: {question}  \n\nTable: {table}"
        prompt = remove_thousand_separators(prompt)
        #prompt = replace_bracketed_numbers(prompt)
        prompt = prompt.replace('$','') 
        res = llm.invoke(prompt)
        code = res.content.replace('```python','').replace('```','')
        return (prompt, code)
    
def exec_code(code, table):  
        table = remove_thousand_separators(f"{table}")
        #table = replace_bracketed_numbers(table)
        table = table.replace('$','')
        #code = code.replace(\"* 1000000\",\"\").replace(\"*1000000\",\"\").replace(\"/ 1000000\",\"\").replace(\"/1000000\",\"\").replace(\"/ 1000\",\"\").replace(\"/1000\",\"\")
        #code = code.replace('abs(','(')
        try: 
            loc = locals()   
            if not "run()" in code:
                exec(code + f"\nr = run({table})\n", globals(), loc)
            else: 
                exec(code + "\nr = run()\n", globals(), loc)
            return loc['r']
        except Exception as e:
                s = '[Error]'+ str(e)
                print(s)
                return (s,'')

In [206]:
table = [['', '', 'Fiscal', ''], ['', '2019', '2018', '2017'], ['', '', '(in millions)', ''], ['Transportation Solutions:', '', '', ''], ['Automotive', ' 5686', ' 6092', '  5228'], ['Commercial transportation', '1221', '1280', '997'], ['Sensors', '914', '918', '814'], ['Total Transportation Solutions', '7821', '8290', '7039'], ['Industrial Solutions:', '', '', ''], ['Industrial equipment', '1949', '1987', '1747'], ['Aerospace, defense, oil, and gas', '1306', '1157', '1075'], ['Energy', '699', '712', '685'], ['Total Industrial Solutions', '3954', '3856', '3507'], ['Communications Solutions:', '', '', ''], ['Data and devices', '993', '1068', '963'], ['Appliances', '680', '774', '676'], ['Total Communications Solutions', '1673', '1842', '1639'], ['Total', ' 13448', ' 13988', ' 12185']]
question = "Question: What was the change in the amount for Appliances in 2019 from 2018?"
( prompt, code) = gen_code(llm, question, table, pars)
res = exec_code(code, table)
[print(idx, r) for idx, r in enumerate(table)]
print(code, res)

0 ['', '', 'Fiscal', '']
1 ['', '2019', '2018', '2017']
2 ['', '', '(in millions)', '']
3 ['Transportation Solutions:', '', '', '']
4 ['Automotive', ' 5686', ' 6092', '  5228']
5 ['Commercial transportation', '1221', '1280', '997']
6 ['Sensors', '914', '918', '814']
7 ['Total Transportation Solutions', '7821', '8290', '7039']
8 ['Industrial Solutions:', '', '', '']
9 ['Industrial equipment', '1949', '1987', '1747']
10 ['Aerospace, defense, oil, and gas', '1306', '1157', '1075']
11 ['Energy', '699', '712', '685']
12 ['Total Industrial Solutions', '3954', '3856', '3507']
13 ['Communications Solutions:', '', '', '']
14 ['Data and devices', '993', '1068', '963']
15 ['Appliances', '680', '774', '676']
16 ['Total Communications Solutions', '1673', '1842', '1639']
17 ['Total', ' 13448', ' 13988', ' 12185']
def run(table):
    data = {'2018': 0, '2019': 0}
    row_names = [row[0] for row in table]
    col_names = table[1]
    
    for i in range(len(row_names)):
        if 'Appliances' in row_

In [207]:
item = devdf.iloc[0]
table = item['table']['table']
pars = [p['text'] for p in item['paragraphs']]
question = item['questions'][5]['question']
( prompt, code) = gen_code(llm, question, table, pars)
res = exec_code(code, table)
print(code, res)

[Error]local variable 'col_2018' referenced before assignment
def run(table):
    for i in range(len(table)):
        if 'Other' in table[i]:
            row_index = i
            break
    
    for j in range(len(table[0])):
        if '2018' in table[row_index][j]:
            col_2018 = j
        if '2019' in table[row_index][j]:
            col_2019 = j
    
    value_2018 = float(table[row_index][col_2018])
    value_2019 = float(table[row_index][col_2019])
    
    percentage_change = ((value_2019 - value_2018) / value_2018) * 100
    
    return (round(percentage_change, 2), 'percent') ("[Error]local variable 'col_2018' referenced before assignment", '')


In [None]:
df = pd.DataFrame(table[2:], columns=table[1])
df.loc[df[0] == 'Other', '2019']

In [131]:
from tqdm.notebook import tqdm as log_progress

res = []
for i, item in log_progress(devdf.iterrows()):
    table = item['table']['table']
    pars = [p['text'] for p in item['paragraphs']]
    #table = transform_elements(table, remove_thousand_separators)
    #table = [remove_thousand_separators(cell) for row in table for cell in row]
    #table = [cell.replace('$', '') for row in table for cell in row]
    #print (table)
    for q in item['questions']:        
        if q['answer_type'] == 'arithmetic' and 'table' in q['answer_from']:             
            ( prompt, code)  = gen_code(llm, q['question'], table, pars)
            r = exec_code(code, table)
            err =  None
            if r is None:
                (pred_value, pred_scale) = ("", "")
            else:   
                if len(r) == 2:                    
                    (pred_value, pred_scale) = r    
                    if isinstance(pred_value, tuple) and len(pred_value) == 2:
                        print('$$$$')
                        (pred_value, pred_scale) = pred_value                     
                    if pred_scale == "%" or pred_scale == "percentage"  :
                        pred_scale = 'percent'
                    if pred_scale not in ["", 'thousand', 'million', 'billion', 'percent']:
                        print('Invalid ', pred_scale)
                        pred_scale = ""    
                    if item['table']['uid'] in res_scale and pred_scale != 'percent':
                        pred_scale = res_scale[item['table']['uid']]
                    if isinstance(pred_value, str):
                        print("string")
                        if  pred_value.startswith('[Error]'):
                            (pred_value, pred_scale) = ("", "")
                            err,_ = r                                                        
                        
                elif len(r) == 1:
                    (pred_value, pred_scale) = (r, "")        
                else:
                    (pred_value, pred_scale) = ("", "")        
                
                
                
            #res.append({"table":table, "q":q, "pred":pred, "code": code})
            res.append(({"answer_type":q["answer_type"], "answer": q["answer"], 'scale': q["scale"]}, pred_value, pred_scale, q, code, prompt, item['table'], err))


0it [00:00, ?it/s]

(-12.6, '')
(-22.22, 'percent')
(-12.14, 'percent')
(2.1, 'percent')
(97.37, 'percent')
[Error]'2018' is not in list
string
(178.67, '')
(40.33, '')
[Error]unterminated string literal (detected at line 6) (<string>, line 6)
string
(25.1, 'percent')
(87.31, '')
(361.0, '')
(73.0, '')
(14.07, 'percent')
[Error]could not convert string to float: '(1040)'
string
[Error]'2018' is not in list
string
[Error]'(' was never closed (<string>, line 5)
string
(154.0, '')
(104.05, 'percent')
(399.33, '')
(88.06, 'percent')
(40.0, 'percent')
(7.19, 'percent')
(-67.6, 'percent')
(0.0, 'percent')
[Error]could not convert string to float: '—'
string
(2487.08, 'percent')
[Error]list index out of range
string
(-61.6, '')
(3.1, 'percent')
[Error]could not convert string to float: ''
string
(73.96, 'percent')
[Error]could not convert string to float: '(89.7)'
string
(38.23, 'percent')


KeyboardInterrupt: 

In [132]:
from tatqa_metric import TaTQAEmAndF1

metrics = TaTQAEmAndF1()

for ans, pred, pred_scale, _,_, _,_,_ in res:
    metrics(ans, pred, pred_scale)
pred_em, pred_f1, scale_score, op_score = metrics.get_overall_metric(reset=False)
print( pred_em, pred_f1, scale_score)

0.4067796610169492 0.4067796610169492 0.4406779661016949


In [None]:
metrics._details

In [133]:
[m for  m in metrics._details if m['f1']<0.2]

[{'answer_type': 'arithmetic',
  'answer': 6.67,
  'scale': 'percent',
  'pred': [97.37],
  'pred_scale': 'percent',
  'em': 0.0,
  'f1': 0.0,
  'pred_span': None,
  'gold_span': None,
  'span_em': 0,
  'span_f1': 0},
 {'answer_type': 'arithmetic',
  'answer': -8.11,
  'scale': 'percent',
  'pred': '',
  'pred_scale': '',
  'em': 0,
  'f1': 0,
  'pred_span': None,
  'gold_span': None,
  'span_em': 0,
  'span_f1': 0},
 {'answer_type': 'arithmetic',
  'answer': 172,
  'scale': 'million',
  'pred': [178.67],
  'pred_scale': 'scale: million',
  'em': 0.0,
  'f1': 0.0,
  'pred_span': None,
  'gold_span': None,
  'span_em': 0,
  'span_f1': 0},
 {'answer_type': 'arithmetic',
  'answer': 50.5,
  'scale': 'million',
  'pred': [40.33],
  'pred_scale': 'scale: million',
  'em': 0.0,
  'f1': 0.0,
  'pred_span': None,
  'gold_span': None,
  'span_em': 0,
  'span_f1': 0},
 {'answer_type': 'arithmetic',
  'answer': 121.5,
  'scale': 'million',
  'pred': [109.0],
  'pred_scale': 'scale: million',
  'e

In [134]:
wrong_indexes = [idx for idx, m in enumerate(metrics._details) if m['f1']<0.2]
good_indexes = [idx for idx, m in enumerate(metrics._details) if m['f1']>0.8]

In [135]:
resdf = pd.DataFrame(res)

In [136]:
resdf2 = resdf[resdf.index.isin(wrong_indexes)]
resdfg = resdf[resdf.index.isin(good_indexes)]

In [180]:
import json

resdf2["answer"] = resdf2[0].apply(lambda x: x['answer'])
resdf2["scale"] = resdf2[0].apply(lambda x: x['scale'])
resdf2["qid"] = resdf2[3].apply(lambda x: x['uid'])
resdf2["q"] = resdf2[3].apply(lambda x: x['question'])

resdfg["answer"] = resdfg[0].apply(lambda x: json.loads(str(x).replace("'", "\""))['answer'])
resdfg["scale"] = resdfg[0].apply(lambda x: json.loads(str(x).replace("'", "\""))['scale'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  resdf2["answer"] = resdf2[0].apply(lambda x: x['answer'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  resdf2["scale"] = resdf2[0].apply(lambda x: x['scale'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  resdf2["qid"] = resdf2[3].apply(lambda x: x['uid'])
A value is trying to be set on a copy o

In [181]:
resdf2


Unnamed: 0,0,1,2,3,4,5,6,7,answer,scale,qid,q
5,"{'answer_type': 'arithmetic', 'answer': 6.67, ...",97.37,percent,{'uid': '4dc8be43-d8d9-4b08-9ffd-9c19012361ce'...,def run(table):\n for row in table:\n ...,Generate a Python function 'run(table)' that c...,{'uid': '52164b70-6973-4844-af6a-76e8f1298d64'...,,6.67,percent,4dc8be43-d8d9-4b08-9ffd-9c19012361ce,What is the year on year percentage change in ...
6,"{'answer_type': 'arithmetic', 'answer': -8.11,...",,,{'uid': '6c44a1a8-0785-43a0-90ab-7e21df2c57d9'...,def run(table):\n for row in table:\n ...,Generate a Python function 'run(table)' that c...,{'uid': '52164b70-6973-4844-af6a-76e8f1298d64'...,[Error]'2018' is not in list,-8.11,percent,6c44a1a8-0785-43a0-90ab-7e21df2c57d9,What is the year on year percentage change in ...
7,"{'answer_type': 'arithmetic', 'answer': 172, '...",178.67,scale: million,{'uid': 'a0414f81-8dc2-44b2-a441-2c9d9c805c4d'...,def run(table):\n for row in table:\n ...,Generate a Python function 'run(table)' that c...,{'uid': '6bf238a5-0a3e-492d-91f8-7f62d3b37fba'...,,172.0,million,a0414f81-8dc2-44b2-a441-2c9d9c805c4d,What is the 2019 average defined contribution ...
8,"{'answer_type': 'arithmetic', 'answer': 50.5, ...",40.33,scale: million,{'uid': 'bf7abd62-d9cd-48d2-8826-1457684019a3'...,def run(table):\n for row in table:\n ...,Generate a Python function 'run(table)' that c...,{'uid': '6bf238a5-0a3e-492d-91f8-7f62d3b37fba'...,,50.5,million,bf7abd62-d9cd-48d2-8826-1457684019a3,What is the 2019 average defined benefit schemes?
9,"{'answer_type': 'arithmetic', 'answer': 121.5,...",109.0,scale: million,{'uid': '4d259081-6da6-44bd-8830-e4de0031744c'...,def run(table):\n dc_2019 = float(table[2][...,Generate a Python function 'run(table)' that c...,{'uid': '6bf238a5-0a3e-492d-91f8-7f62d3b37fba'...,,121.5,million,4d259081-6da6-44bd-8830-e4de0031744c,What is the difference between 2019 average de...
11,"{'answer_type': 'arithmetic', 'answer': 25.1, ...",,,{'uid': '348d031d-73ab-4b35-af46-998cfef25775'...,def run(table):\n for row in table:\n ...,Generate a Python function 'run(table)' that c...,{'uid': 'f8ac9ddd-9872-4681-902d-a0ee7c0ee83a'...,[Error]unterminated string literal (detected a...,25.1,percent,348d031d-73ab-4b35-af46-998cfef25775,What is the percentage change in the valuation...
12,"{'answer_type': 'arithmetic', 'answer': 2.98, ...",25.1,percent,{'uid': 'aea850af-68cb-4cc1-80b2-c9c2860b273e'...,def run(table):\n for row in table:\n ...,Generate a Python function 'run(table)' that c...,{'uid': 'f8ac9ddd-9872-4681-902d-a0ee7c0ee83a'...,,2.98,percent,aea850af-68cb-4cc1-80b2-c9c2860b273e,What is the percentage change in the net defer...
13,"{'answer_type': 'arithmetic', 'answer': 2.93, ...",532.8,none,{'uid': 'ba6783f3-8207-419a-b407-3f688682caef'...,def run(table):\n assets_2019 = float(table...,Generate a Python function 'run(table)' that c...,{'uid': 'e9a946ce-72a9-4b42-86d6-4d91fceb14db'...,,2.93,,ba6783f3-8207-419a-b407-3f688682caef,What is the ratio of IMFT’s total assets to to...
14,"{'answer_type': 'arithmetic', 'answer': 0.87, ...",87.31,none,{'uid': '263d03ec-83d2-48df-8376-1a72167798f7'...,def run(table):\n for row in table:\n ...,Generate a Python function 'run(table)' that c...,{'uid': 'e9a946ce-72a9-4b42-86d6-4d91fceb14db'...,,0.87,,263d03ec-83d2-48df-8376-1a72167798f7,"What is the proportion of IMFT’s property, pla..."
15,"{'answer_type': 'arithmetic', 'answer': -361, ...",361.0,none,{'uid': '78fc6d55-c20c-4f71-99fe-bc40a16e61d0'...,def run(table):\n for row in table:\n ...,Generate a Python function 'run(table)' that c...,{'uid': 'e9a946ce-72a9-4b42-86d6-4d91fceb14db'...,,-361.0,,78fc6d55-c20c-4f71-99fe-bc40a16e61d0,What is the change of IMFT’s total assets from...


In [None]:
import re
from typing import Optional, Tuple

def extract_number(input_str: str) -> Optional[Tuple[float, str]]:
    """
    Extracts a numeric value from a string and returns a tuple (number, other_characters).
    If no number is found, returns None.
    
    Handles integers, floats, percentages, currency, and thousand separators.
    
    Args:
        input_str (str): The input string.
    
    Returns:
        Optional[Tuple[float, str]]: A tuple with the number and the remaining string,
        or None if no number is found.
    """
    # Define a regex pattern for matching numbers
    pattern = r"""
        [-+]?\s*                   # Optional sign with optional space
        (?:
            (\d{1,3}(?:,\d{3})*|   # Match integer with thousands separators
            \d+)?                  # Or plain integer
            (\.\d+)?               # Match optional decimal part
            |(\d*\.\d+)            # Match decimal number without leading integer
        )
        \s*%?                      # Optional percentage symbol
        |\$\s*\d[\d,]*(?:\.\d+)?   # Optional currency
    """
    
    # Compile the regex with the verbose flag
    regex = re.compile(pattern, re.VERBOSE)
    
    match = regex.search(input_str)
    if not match:
        return None
    
    number_str = match.group(0)
    other_characters = input_str.replace(number_str, '', 1).strip()
    
    # Normalize the number by removing commas and currency symbols
    number_str = number_str.replace(',', '').replace('$', '').strip('%').strip()
    
    try:
        number = float(number_str)
    except ValueError:
        return None
    
    return number, other_characters

In [237]:
table = [['', 'Domestic', '', 'International', ''],
['', 'September 30,', '', 'September 30,', ''],
['', '2019', '2018', '2019', '2018'],
['Discount rate', '4.00%', '3.75%', '1.90%', '2.80%'],
['Expected return on plan assets', '', '', '3.40%', '3.70%'],
['Rate of compensation increase', '', '', '- - %', '- - %']]



In [266]:
def fill_column_headers(row):    
    if row[0] != '':
        return row
    col_num = len([c for c in row if c!=''])
    step_size = int((len(row) - 1) / col_num)    
    for i in range(col_num):
        col_name = None
        for j in range(step_size):
            if row[1+i*step_size] != '':
                col_name = row[1+i*step_size]
                break
        for j in range(step_size):
            row[1+i*step_size + j] = col_name
    return row

for idx, r in enumerate((table)):
    print(get_row_boundaries(r))


['', 'Domestic', 'Domestic', 'International', 'International']
['', 'September 30,', 'September 30,', 'September 30,', 'September 30,']
['', '2019', '2018', '2019', '2018']
['Discount rate', '4.00%', '3.75%', '1.90%', '2.80%']
['Expected return on plan assets', None, None, '3.40%', '3.70%']
['Rate of compensation increase', None, None, '- - %', '- - %']


In [288]:
def fill_table_headers(table):
    first_value_col_idx = 1
    
    for idx, r in enumerate(reversed(table)):
        if r[0] == '': ## header end
            first_value_row_idx = idx
            break        
    for idx, r in enumerate(reversed(table[0:first_value_row_idx])):
       table[idx] = fill_column_headers(table[idx])    
        
    return (table, first_value_col_idx, first_value_row_idx)
    
fill_table_headers(table)

([['', 'Domestic', 'Domestic', 'International', 'International'],
  ['', 'September 30,', 'September 30,', 'September 30,', 'September 30,'],
  ['', '2019', '2018', '2019', '2018'],
  ['Discount rate', '4.00%', '3.75%', '1.90%', '2.80%'],
  ['Expected return on plan assets', None, None, '3.40%', '3.70%'],
  ['Rate of compensation increase', None, None, '- - %', '- - %']],
 1,
 3)

In [289]:
def convert_table(table):
    (table, first_value_col_idx, first_value_row_idx) = fill_table_headers(table)
    
    for i in range(first_value_row_idx, len(table)):        
        for j in range(first_value_col_idx, len(table[0])):
            if (isn)
convert_table(table)

4.00%
3.75%
1.90%
2.80%
None
None
3.40%
3.70%
None
None
- - %
- - %


In [292]:
'400'.isnumeric()

True

In [262]:
def get_row_boundaries(row):    
    col_num = len([c for c in row if c!=''])
    step_size = int((len(row) - 1) / col_num)
    res = [{0: [0,1]}]
    for i in range(col_num):
        res.append({i+1:[1+i*step_size, i*step_size+ step_size ]})
    return res

for idx, r in enumerate((table)):
    print(get_row_boundaries(r))

[{0: [0, 1]}, {1: [1, 2]}, {2: [3, 4]}]
[{0: [0, 1]}, {1: [1, 2]}, {2: [3, 4]}]
[{0: [0, 1]}, {1: [1, 1]}, {2: [2, 2]}, {3: [3, 3]}, {4: [4, 4]}]
[{0: [0, 1]}, {1: [1, 0]}, {2: [1, 0]}, {3: [1, 0]}, {4: [1, 0]}, {5: [1, 0]}]
[{0: [0, 1]}, {1: [1, 1]}, {2: [2, 2]}, {3: [3, 3]}]
[{0: [0, 1]}, {1: [1, 1]}, {2: [2, 2]}, {3: [3, 3]}]


In [204]:
item = resdf2[resdf2['qid'] == '4dc8be43-d8d9-4b08-9ffd-9c19012361ce'].iloc[0]
[print(r) for r in item[6]['table']]
print(item['q'])
print(item[4])

['', 'Domestic', '', 'International', '']
['', 'September 30,', '', 'September 30,', '']
['', '2019', '2018', '2019', '2018']
['Discount rate', '4.00%', '3.75%', '1.90%', '2.80%']
['Expected return on plan assets', '', '', '3.40%', '3.70%']
['Rate of compensation increase', '', '', '- - %', '- - %']
What is the year on year percentage change in domestic discount rate between 2018 and 2019?
def run(table):
    for row in table:
        if 'Discount rate' in row:
            rate_2019 = float(row[2].replace('%', '')) / 100
            rate_2018 = float(row[3].replace('%', '')) / 100
            yoy_change = ((rate_2019 - rate_2018) / rate_2018) * 100
            return (round(yoy_change, 2), 'percent')

# Example usage
table = [['', 'Domestic', '', 'International', ''], ['', 'September 30,', '', 'September 30,', ''], ['', '2019', '2018', '2019', '2018'], ['Discount rate', '4.00%', '3.75%', '1.90%', '2.80%'], ['Expected return on plan assets', '', '', '3.40%', '3.70%'], ['Rate of compensa

In [225]:
table = str([
    {'dimensions': ['Domestic', 'September 30', '2019', 'Discount rate'], 'value':'4.00%'},
    {'dimensions': ['Domestic', 'September 30', '2018', 'Discount rate'], 'value':'3.75%'},
    {'dimensions': ['International', 'September 30', '2019', 'Discount rate'], 'value':'1.90%'},
    {'dimensions': ['International', 'September 30', '2018', 'Discount rate'], 'value':'2.80%'},
    {'dimensions': ['International', 'September 30', '2019', 'Expected return on plan assets'], 'value':'3.40%'},
    {'dimensions': ['International', 'September 30', '2018', 'Expected return on plan assets'], 'value':'3.70%'},    
])
question = 'What is the year on year percentage change in domestic discount rate between 2018 and 2019?'
code = llm.invoke(f"Generate a Python function 'run(table)' that can answer the following question using the following table! The function must return numeric float results with accuracy to two decimal places. If result value or calculation is percent calculation return format (value, 'percent'), every orher case return (value, '') . Do not write explanation, just code.\n\nQuestion: {question}  \n\nTable: {table}").content
print(code)
exec_code(code, table)

def run(table):
    value_2019 = float([entry['value'] for entry in table if entry['dimensions'] == ['Domestic', 'September 30', '2019', 'Discount rate']][0][:-1])
    value_2018 = float([entry['value'] for entry in table if entry['dimensions'] == ['Domestic', 'September 30', '2018', 'Discount rate']][0][:-1])
    
    percentage_change = ((value_2019 - value_2018) / value_2018) * 100
    
    return (round(percentage_change, 2), 'percent') if percentage_change != 0 else (round(percentage_change, 2), '')


(6.67, 'percent')

['Rate of compensation increase', '', '', '- - %', '- - %']

In [175]:
resdf2[["qid","answer","scale", 1,2,4,5,7]].to_csv('wrong_answers.csv')


In [139]:
print(resdfg.iloc[0][4])

def run(table):
    for row in table:
        if 'Other' in row:
            other_2019 = float(row[1])
            other_2018 = float(row[2])
            change = other_2019 - other_2018
            return (round(change, 2), '')

# Example usage
table = [['', '', 'Years Ended September 30,', ''], ['', '2019', '2018', '2017'], ['Fixed Price', '  1452.4', '  1146.2', '  1036.9'], ['Other', '44.1', '56.7', '70.8'], ['Total sales', '1496.5', '1202.9', '1107.7']]
print(run(table))


In [None]:
str(resdf2[0].values[0])

In [209]:
good = 0
for ans, pred, pred_scale, q,code, _,table,err in res:
    llimit = ans['answer']*0.99
    ulimit = ans['answer']*1.01
    
    if pred  is None or isinstance(pred, tuple) or isinstance(pred, str):
        #if ans['scale'] != pred_scale:
        #    print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' <> ' , ans['answer'], ans['scale'])
       
        #type error
        #if isinstance(pred, str):
        #    print (pred, pred_scale, ' -- ' , ans['answer'])
        #if isinstance(pred, tuple):
        #    print (pred, pred_scale, ' == ' , ans['answer'], ans['scale'])
        #print('===============================================================================')
        print('{',pred, '}', ans['answer'], ans['scale'], err)
        continue
    
    #pred = round(pred, 2)
    
    if (pred > 0 and llimit < pred and pred < ulimit) or (pred < 0 and llimit > pred and pred > ulimit) or pred==ulimit or pred == llimit:
        good = good + 1
        if ans['scale'] == pred_scale and  pred_scale == 'thousand':
            print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' == ' , ans['answer'], ans['scale'])
        
        #if ans['scale'] != pred_scale:
        #    print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' <> ' , ans['answer'], ans['scale'])
        #if  ans['scale'] == 'thousand':
        #    print (q['uid'], pred, pred_scale, ' == ' , ans['answer'], ans['scale'])
          
    else:
        #value error
        #  if ans['scale'] != pred_scale:
        #    print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' <> ' , ans['answer'], ans['scale'])
        #if ans['answer'] != pred and ans['scale'] == pred_scale and pred_scale != "":
        #    print ('***********', q['uid'], 'pred: ',pred, 'scale:',pred_scale, ' <> ' , ans['answer'], ans['scale'])
       
        None
        #print (q['uid'], pred, pred_scale, ' <> ' , ans['answer'], ans['scale'])
        #print(q['question'])
        #print(table['table'])
        #print(code)
        
    #if isinstance(pred, str):
    #    print (pred, pred_scale, ' -- ' , ans['answer'])
    #if isinstance(pred, tuple):
    #    print (pred, pred_scale, ' == ' , ans['answer'], ans['scale'])
        
print (good/len(res))

ValueError: too many values to unpack (expected 8)

In [211]:
res


("[Error]local variable 'col_2018' referenced before assignment", '')

In [None]:
good, len(res)

In [None]:
closing_cash_2019