In [1]:
from importlib import reload
import mrf
import AgentS1
import AgentS2
import utils
import evaluate

In [45]:
from pathlib import Path
import json

class RunAgents():
    def __init__(self, question_schema, prompts, log_dir, city, model_dial, model_trans, chat_temp):
        # check if log_dir exists, if not add

        self.question_schema = question_schema
        self.prompts = prompts
        path = Path(log_dir)
        path.mkdir(parents=True, exist_ok=True)

        self.log_dir = log_dir
        self.city = city
        self.model_dial = model_dial
        self.model_trans = model_trans
        self.chat_temp = chat_temp

    
    def run_agents(self):
        self.agents1 = AgentS1.AgentS1(self.prompts, self.question_schema, city=self.city, log_dir=self.log_dir, log_name='agents1.json', model_dial=self.model_dial, model_trans=self.model_trans, chat_temp=self.chat_temp)
        
        s1_schema = self.agents1.compile()

        self.agents2 = AgentS2.AgentS2(self.prompts, s1_schema, city=self.city, log_dir=self.log_dir, log_name='agents2.json', model_dial=self.model_dial, model_trans=self.model_trans, chat_temp=self.chat_temp)

        s2_schema = self.agents2.compile()

        # save s2_schema
        json.dump(s2_schema, open(f'{self.log_dir}/agents.json', 'w'), indent=4)

        return s2_schema




In [105]:
import datetime
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from tabulate import tabulate
import numpy as np
import torch
reload(mrf)
reload(AgentS1)
reload(AgentS2)
reload(utils)
reload(evaluate)

class evaluation_suite():
    def __init__(self, question_folder, identifier='', log_dir='./Experiments', prompts='./data/prompts_13.json', model_dial='gpt-4o', model_trans='gpt-3.5', chat_temp=0.3, data_folder='./data/'):

        self.question_folder = question_folder
        if identifier == '':
            self.identifier = str(datetime.datetime.now().strftime('%m%d_%H%M'))
        else:
            self.identifier = identifier

        self.prompts = prompts

        self.log_dir = log_dir

        self.model_dial = model_dial
        self.model_trans = model_trans
        self.chat_temp = chat_temp
        self.data_folder = data_folder

        self.agent_records = {}
        self.exp_records = {}



    def run_agent(self, ):

        # load questions from folder
        question_folder = Path(self.question_folder)
        question_files = list(question_folder.glob('*.json'))
        for question in question_files:
            question_schema = json.load(open(question))

            city = question_schema['City']
            question_schema.pop('City')
            # remove state after ", "
            question_name = city.split(',')[0].replace(' ', '')

            run_agents = RunAgents(question_schema=question_schema, prompts=self.prompts, log_dir=self.log_dir+f'/{question.stem}', city=city, model_dial=self.model_dial, model_trans=self.model_trans, chat_temp=self.chat_temp)

            s2_schema_records = run_agents.run_agents()

            self.agent_records[question.stem] = s2_schema_records
            

    def load_experiments(self, ):
        # load experiments *_schema.json
        logdir = Path(self.log_dir)
        # get all folders under question folder, not files
        logdirs = [x for x in logdir.iterdir() if x.is_dir() and '.' not in x.stem]
        print(str(logdir))
        for question in logdirs:
            print("looking in ", str(question) + '/agents2.json')
            question_name = question.stem
            schema = json.load(open(f'{question}/agents2.json'))
            self.agent_records[question_name] = schema
            print(schema['schema']['Variables'])


    def optimize_mrf(self, w0=0, update_iter=50000):

        for question, schema in self.agent_records.items():
            print(f'Evaluating {question}')
            print(schema['schema'])
            mrf_obj = mrf.Brute(verbose=True)
            #tmp = schema['schema'].copy()
            #tmp = remove_vars(tmp, [], one='Price')
            mrf_obj.from_json(schema['schema'])
            #print(schema['schema']['Constraints'])
            #mrf_obj.from_json(tmp)
            if w0 != 0:
                mrf_obj.set_w0(w0)
            mrf_obj.update(update_iter, slack=False)

            # save model to same directory
            torch.save(mrf_obj, f'{self.log_dir}/{question}/ent{str(w0)}.pt')
        

    def get_results(self, w0=0, ):
        
        for question, schema in self.agent_records.items():

            print(f'Evaluating {question}')
            try:
                mrf_obj = torch.load(f'{self.log_dir}/{question}/ent{str(w0)}.pt')
                mrf_result = mrf_obj.query(schema['schema'])
            except:
                mrf_result = 0
            zero_result = schema['zero shot']['result']['Probability']
            if 'cot' not in schema:
                cot_result = zero_result
            else:
                cot_result = schema['cot']['result']['Probability']
            # data folder
            dat_path = Path(self.data_folder + '/' + schema['city'].split(',')[0].replace(" ", "") + '.csv')
            data = utils.DatasetQ(dat_path)
            ground_truth = data.marg(schema['schema']['Question'])
            self.exp_records[question] = {'mrf': mrf_result, 'zero_shot': zero_result, 'cot': cot_result, 'ground_truth': ground_truth}
            
            json.dump(self.exp_records[question], open(f'{self.log_dir}/{question}/run_results{str(w0)}.json', 'w'), indent=4)



    def evaluate(self,):

        table = [["City", "Zero Shot", "Ground", "MRF", "CoT", "Zero TVD", "MRF TVD", "CoT TVD"]]

        for city, results in self.exp_records.items():
            tmp = []
            #print(results)
            mrf_ground = evaluate.total_variation_distance(results['ground_truth'], results['mrf'])
            zero_shot_ground = evaluate.total_variation_distance(results['ground_truth'], results['zero_shot'])
            cot_ground = evaluate.total_variation_distance(results['ground_truth'], results['cot'])
            tmp.append(city)
            tmp.append(np.array(results['zero_shot']).round(3))
            tmp.append(np.array(results['ground_truth']).round(3))
            tmp.append(np.array(results['mrf']).round(3))
            tmp.append(np.array(results['cot']).round(3))
            #tmp.append((mrf_ground - zero_shot_ground).round(3))
            tmp.append(zero_shot_ground.round(3))
            tmp.append(mrf_ground.round(3))
            tmp.append(cot_ground.round(3))
            table.append(tmp)

        print(tabulate(table))
        return table


test = evaluation_suite(question_folder='./Testing', identifier='test', log_dir='./Experiments4', prompts='./data/prompts_13.json', model_dial='gpt-4o', model_trans='gpt-3.5-turbo', chat_temp=0)


"""
# How to use
question_folder: folder with json question files. If you run this script only to get one run on one city, separate the questions so question_folder is './Questions/BrowardCounty' and your question file like './Questions/BrowardCounty/question.json', no restriction on the file name


identifier: ignore this


log_dir: directory to save results. Similarly if one run one city, set log_dir to './Experiments/run_1'. Script will create and populate folder './Experiments/run_1/BrowardCounty'


prompts: fixed path to the prompts file, it contains all prompts needed



data_folder: fixed path to the data folder. if set to './data/', will look for csv './data/BrowardCounty.csv'


the rest are intuitive
"""



# This will iterate over all questions, one in this case, including cot and zero shot, and save the results in the log_dir/cityname/ folder
#test.run_agent()


# load the saved experiment files that resulted from the previous step
test.load_experiments()


# this will optimize the MRF model, w0=0 will be faster, automatically save the model to the same folder log_dir/cityname/
test.optimize_mrf(w0=0, update_iter=10000)



# get results, this only attempts to load mrf from local. If it doesn't exist, it will record other results and dump in log_dir/cityname/
test.get_results(w0=0)



# this will print the tabulate
ret = test.evaluate()


Experiments4
looking in  Experiments4/la2/agents2.json
[{'Name': 'Number of Bathrooms', 'Value': ['0-1', '2', '3 or more']}, {'Name': 'Number of Beds', 'Value': ['0-1', '2-3', '4 or more']}, {'Name': 'Location', 'Value': ['Downtown', 'Beach Area', 'Suburbs']}, {'Name': 'Price', 'Value': ['$0-$50', '$51-$100', '$101-$200', '$201-$500', '$501 and above']}]
looking in  Experiments4/la0/agents2.json
[{'Name': 'Number of Bedrooms', 'Value': ['1 Bedroom', '2 Bedrooms', '3+ Bedrooms']}, {'Name': 'Host Experience', 'Value': ['0-1 Years', '1-5 Years', '5+ Years']}, {'Name': 'Superhost Status', 'Value': ['Yes', 'No']}, {'Name': 'Price', 'Value': ['$0-$50', '$51-$100', '$101-$200', '$201-$500', '$501 and above']}]
looking in  Experiments4/la1/agents2.json
[{'Name': 'Property Type', 'Value': ['Entire home/apartment', 'Private room', 'Shared room']}, {'Name': 'Number of Guests', 'Value': ['1-2 guests', '3-4 guests', '5+ guests']}, {'Name': 'Location', 'Value': ['Downtown', 'Beachfront', 'Suburban']

Training Progress:   0%|          | 0/10000 [00:00<?, ?it/s]

Training Progress:   7%|▋         | 699/10000 [00:00<00:01, 6989.52it/s]

Loss:  tensor(4.6372, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.6846, 0.5706, 0.5675, 0.5372, 0.4319], grad_fn=<TopkBackward0>),
indices=tensor([32, 47, 22, 72, 25]))


Training Progress:  32%|███▏      | 3151/10000 [00:00<00:00, 8014.81it/s]

Loss:  tensor(0.0518, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0504, 0.0496, 0.0486, 0.0484, 0.0450], grad_fn=<TopkBackward0>),
indices=tensor([ 4, 13, 51,  6, 23]))


Training Progress:  56%|█████▌    | 5576/10000 [00:00<00:00, 8063.60it/s]

Loss:  tensor(0.0469, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0498, 0.0496, 0.0487, 0.0445, 0.0412], grad_fn=<TopkBackward0>),
indices=tensor([ 4, 13,  6, 23,  5]))


Training Progress:  64%|██████▍   | 6383/10000 [00:00<00:00, 7862.43it/s]

Loss:  tensor(0.0465, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0499, 0.0495, 0.0489, 0.0454, 0.0409], grad_fn=<TopkBackward0>),
indices=tensor([ 4, 13,  6, 23,  5]))


Training Progress:  96%|█████████▌| 9583/10000 [00:01<00:00, 7564.21it/s]

Loss:  tensor(0.0464, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0500, 0.0494, 0.0489, 0.0459, 0.0408], grad_fn=<TopkBackward0>),
indices=tensor([ 4, 13,  6, 23,  5]))


Training Progress: 100%|██████████| 10000/10000 [00:01<00:00, 7452.45it/s]


Loss:  tensor(0.0464, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0500, 0.0493, 0.0489, 0.0462, 0.0407], grad_fn=<TopkBackward0>),
indices=tensor([ 4, 13,  6, 23,  5]))
Evaluating la0
{'Question': {'Target': [{'Name': 'Price'}], 'Condition': [{'Name': 'Superhost', 'Value': ['Yes']}, {'Name': 'Number of Bedrooms', 'Value': ['2 bedrooms']}, {'Name': 'Host Experience', 'Value': ['1-5 years']}], 'Text': 'What is the price of an Airbnb listing in Los Angeles, CA with 2 bedrooms, hosted by a host with 1-5 years of experience, who is also a superhost?'}, 'Variables': [{'Name': 'Number of Bedrooms', 'Value': ['1 Bedroom', '2 Bedrooms', '3+ Bedrooms']}, {'Name': 'Host Experience', 'Value': ['0-1 Years', '1-5 Years', '5+ Years']}, {'Name': 'Superhost Status', 'Value': ['Yes', 'No']}, {'Name': 'Price', 'Value': ['$0-$50', '$51-$100', '$101-$200', '$201-$500', '$501 and above']}], 'Queries': [{'Target': [{'Name': 'Price'}], 'Condition': [{'Name': 'Number of Bedrooms', 'Value'

Training Progress:  17%|█▋        | 1737/10000 [00:00<00:00, 8728.84it/s]

Loss:  tensor(7.6916, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.7022, 0.6444, 0.6437, 0.6427, 0.6208], grad_fn=<TopkBackward0>),
indices=tensor([25, 27, 33,  0,  4]))


Training Progress:  35%|███▍      | 3480/10000 [00:00<00:00, 8664.22it/s]

Loss:  tensor(0.0966, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0963, 0.0833, 0.0794, 0.0753, 0.0641], grad_fn=<TopkBackward0>),
indices=tensor([10, 59,  7, 22, 58]))


Training Progress:  52%|█████▏    | 5224/10000 [00:00<00:00, 8584.22it/s]

Loss:  tensor(0.0946, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0960, 0.0820, 0.0794, 0.0743, 0.0642], grad_fn=<TopkBackward0>),
indices=tensor([10, 59,  7, 22, 58]))


Training Progress:  70%|██████▉   | 6973/10000 [00:00<00:00, 8673.71it/s]

Loss:  tensor(0.0940, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0958, 0.0817, 0.0794, 0.0742, 0.0644], grad_fn=<TopkBackward0>),
indices=tensor([10, 59,  7, 22, 58]))


Training Progress:  96%|█████████▌| 9623/10000 [00:01<00:00, 8664.93it/s]

Loss:  tensor(0.0937, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0957, 0.0816, 0.0794, 0.0740, 0.0644], grad_fn=<TopkBackward0>),
indices=tensor([10, 59,  7, 22, 58]))


Training Progress: 100%|██████████| 10000/10000 [00:01<00:00, 8624.43it/s]


Loss:  tensor(0.0936, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0957, 0.0814, 0.0794, 0.0739, 0.0645], grad_fn=<TopkBackward0>),
indices=tensor([10, 59,  7, 22, 58]))
Evaluating la1
{'Question': {'Target': [{'Name': 'Price'}], 'Condition': [{'Name': 'Max Accommodates', 'Value': ['3']}, {'Name': 'Room Type', 'Value': ['Entire home/apt']}], 'Text': 'What is the price of renting an entire home or apartment in Los Angeles, CA that can accommodate up to 3 people?'}, 'Variables': [{'Name': 'Property Type', 'Value': ['Entire home/apartment', 'Private room', 'Shared room']}, {'Name': 'Number of Guests', 'Value': ['1-2 guests', '3-4 guests', '5+ guests']}, {'Name': 'Location', 'Value': ['Downtown', 'Beachfront', 'Suburban']}, {'Name': 'Price', 'Value': ['$0-$50', '$51-$100', '$101-$200', '$201-$500', '$501 and above']}], 'Queries': [{'Target': [{'Name': 'Price'}], 'Condition': [{'Name': 'Property Type', 'Value': ['Entire home/apartment']}, {'Name': 'Number of Guests', 'V

Training Progress:   0%|          | 0/10000 [00:00<?, ?it/s]

Loss:  tensor(11.8551, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.9282, 0.9088, 0.8888, 0.8629, 0.8621], grad_fn=<TopkBackward0>),
indices=tensor([32, 67, 52, 47,  6]))


Training Progress:  32%|███▏      | 3156/10000 [00:00<00:00, 8000.80it/s]

Loss:  tensor(0.0873, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0892, 0.0660, 0.0623, 0.0618, 0.0608], grad_fn=<TopkBackward0>),
indices=tensor([49, 59, 66,  2, 53]))


Training Progress:  56%|█████▌    | 5566/10000 [00:00<00:00, 7937.82it/s]

Loss:  tensor(0.0451, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0564, 0.0549, 0.0410, 0.0407, 0.0391], grad_fn=<TopkBackward0>),
indices=tensor([49, 66, 59,  2, 48]))


Training Progress:  71%|███████▏  | 7149/10000 [00:00<00:00, 7857.40it/s]

Loss:  tensor(0.0442, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0553, 0.0547, 0.0410, 0.0404, 0.0392], grad_fn=<TopkBackward0>),
indices=tensor([49, 66,  2, 48, 59]))


Training Progress:  96%|█████████▌| 9604/10000 [00:01<00:00, 8080.86it/s]

Loss:  tensor(0.0436, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0548, 0.0544, 0.0412, 0.0400, 0.0387], grad_fn=<TopkBackward0>),
indices=tensor([49, 66,  2, 48, 59]))


Training Progress: 100%|██████████| 10000/10000 [00:01<00:00, 7924.43it/s]


Loss:  tensor(0.0433, grad_fn=<SumBackward0>) torch.return_types.topk(
values=tensor([0.0544, 0.0541, 0.0412, 0.0398, 0.0384], grad_fn=<TopkBackward0>),
indices=tensor([49, 66,  2, 48, 59]))
Evaluating la2
Evaluating la0
Evaluating la1
----  --------------------------  -------------------------------  -------------------------------  --------------------------  --------  -------  -------
City  Zero Shot                   Ground                           MRF                              CoT                         Zero TVD  MRF TVD  CoT TVD
la2   [0.   0.   0.05 0.35 0.6 ]  [0.437 0.325 0.056 0.056 0.127]  [0.021 0.033 0.27  0.619 0.058]  [0.01 0.02 0.1  0.5  0.37]  0.767     0.777    0.731
la0   [0.01 0.05 0.2  0.6  0.14]  [0.001 0.037 0.487 0.439 0.036]  [0.024 0.042 0.168 0.533 0.232]  [0.01 0.05 0.2  0.6  0.14]  0.287     0.318    0.287
la1   [0.01 0.05 0.3  0.5  0.14]  [0.007 0.222 0.586 0.173 0.011]  [0.028 0.099 0.357 0.401 0.115]  [0.01 0.04 0.2  0.5  0.25]  0.458     0.353    0

In [None]:
LosAngeles  [0.  0.  0.1 0.8 0.1]  [0.001 0.037 0.487 0.439 0.036]  [0.063 0.089 0.232 0.469 0.147]  [0.01 0.04 0.2  0.55 0.2 ]  0.425     0.255    0.287

In [24]:
def remove_vars(constraints, vars, one=''):
    ret = {}
    ret['Variables'] = []
    ret['Constraints'] = []
    check = set()
    for var in constraints['Variables']:
        if var['Name'] not in vars:
            ret['Variables'].append(var)
    for cons in constraints['Constraints']:
        if cons is None:
            print("None")
            continue
        if cons['Target'][0]['Name'] not in vars and (len(cons['Condition']) == 0 or (cons['Condition'][0]['Name'] not in vars)):
            if len(cons['Condition']) != 0 or cons['Target'][0]['Name'] != one:
                print(cons['Target'][0]['Name'], cons['Condition'])
                tt = {}
                tt['Target'] = cons['Target']
                tt['Probability'] = cons['Probability']
                #tt['Question'] = cons['Question']
                tt['Condition'] = []
                for cond in cons['Condition']:
                    if cond['Name'] == 'City':
                        continue
                    tt['Condition'].append(cond)
                ret['Constraints'].append(tt)
            else:
                print("here")
    return ret