In [None]:
import requests

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

# download_file_from_google_drive("1m_M2VOjM7Xxq0z9hxgGdYIGIKhxOIU-F", "spider_train.json")
# download_file_from_google_drive("1twy32bdOYcTY8HXrISm1vTBMMjTzI9Fa", "spider_eval.json")


In [None]:
import json

data = json.load(open("spider_train.json"))
for i, row in enumerate(data):
    row['row_id'] = f"spider_train|{i}"

data_eval = json.load(open("spider_eval.json"))
for i, row in enumerate(data):
    row['row_id'] = f"spider_eval|{i}"

In [None]:
from sketch.examples.prompt_machine import *
PM_SETTINGS["VERBOSE"] = False

In [None]:
# spider_input = json.load(open("spider/train_spider.json"))

In [None]:
# import requests

# API_KEY = # GET THIS FROM SOMEONE.

# def get_gpt3_response(prompt):
#     headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
#     data = {"prompt": prompt, "max_tokens": 200, "temperature": 0, "model": "text-davinci-002"}
#     response = requests.post("https://api.openai.com/v1/completions", headers=headers, json=data)
#     return response.json()



In [None]:
def line_prompt(row, answer=None):
    return f"Question: {row['text_in']};\nSchema Information: {row['struct_in']};\nSQL:\n```\n" + (f"{answer}\n```\n" if answer else "")

def get_preprompt(ids):
    return '\n'.join([line_prompt(data[id], data[id]['query'])+"\n" for id in ids])

preprompt123 = get_preprompt([123, 456, 1516, 1522])
def get_prompt(row):
    return preprompt123 + line_prompt(row)

In [None]:
SPIDER_DB_PATH = "/home/jawaugh/benchmarks/raw/spider/spider/database/"

In [None]:
# compare executed results
import os

def get_result(db_name, sql):
    import sqlite3
    conn = sqlite3.connect(os.path.join(SPIDER_DB_PATH, f"{db_name}/{db_name}.sqlite"))
    conn.text_factory = lambda b: b.decode(errors="ignore")
    cur = conn.cursor()
    cur.execute(sql)
    return cur.fetchall()

In [None]:
gpt3responsder = GPT3Prompt("standard_prompt", "{{prompt}}", temperature=0, model_name="code-davinci-002", stop="```")

def run_a_row(row, verbose=True):
    prompt = get_prompt(row)

    # print("PROMPT USED FOR GPT3")
    # print(prompt)
    # print("-------------------")

    try:
        gpt3_answer = gpt3responsder(prompt)
        # gpt3_answer = "wow, this is a great answer"
    except:
        gpt3_answer = "Something failed in querying openai"
    query_answer = row['query']
    seq_out_answer = row['seq_out']

    if verbose:
        print("Question: ", row['question'])
        print("GOLD: ", query_answer)
        print("GPT3: ", gpt3_answer)
        print("SEQ_OUT: ", seq_out_answer)

    try:
        real_data = get_result(row['db_id'], query_answer)
    except Exception as e:
        print(e)
        real_data = "EXECUTION ERROR"
    # attempt to run all 3 and get data
    try:
        gpt3_data = get_result(row['db_id'], gpt3_answer)
    except:
        gpt3_data = "EXECUTION ERROR"
    try:
        seq_out_data = get_result(row['db_id'], seq_out_answer)
    except:
        seq_out_data = "EXECUTION ERROR"
    if verbose:
        print("REAL: ", real_data)
        print("GPT3: ", gpt3_data)
        print("T5: ", seq_out_data)
    sql = {'query_answer': query_answer, 'gpt3_answer': gpt3_answer, 'seq_out_answer': seq_out_answer}
    return sql, (real_data, gpt3_data, seq_out_data)

def get_scores(real_data, gpt3_data, seq_out_data):
    scores = {}
    scores['gpt3_correct'] = gpt3_data == real_data
    scores['seq_out_correct'] = seq_out_data == real_data
    scores['gpt3_executed'] = gpt3_data != "EXECUTION ERROR"
    scores['seq_out_executed'] = seq_out_data != "EXECUTION ERROR"
    scores['query_executed'] = real_data != "EXECUTION ERROR"
    return scores


In [None]:
import random
import pandas as pd
import time

def get_result_run(dataset):
    all_results = []
    print(f"{len(dataset)}")
    for i, row in enumerate(dataset):
        time.sleep(10.0)  # add a sleep to prevent any rate limits for now
        sql, results = run_a_row(row, verbose=False)
        scores = get_scores(*results)
        all_results.append({'id': row['row_id'], **sql, **scores})
        print(f"{i} done")
        temp_results = pd.DataFrame(all_results)
        print("Ran for {} rows".format(len(temp_results)))
        print(temp_results.seq_out_correct.mean()*100, "% of seq_out_correct")
        print(temp_results.seq_out_executed.mean()*100, "% of seq_out_executed")
        print(temp_results.gpt3_correct.mean()*100, "% of gpt3_correct")
        print(temp_results.gpt3_executed.mean()*100, "% of gpt3_executed")
        print(temp_results.query_executed.mean()*100, "% of query_executed")
        print("=="*40)
    return pd.DataFrame(all_results)

In [None]:
df = get_result_run(random.sample(data, 200))

In [None]:
print("Ran for {} rows".format(len(df)))
print(df.seq_out_correct.mean()*100, "% of seq_out_correct")
print(df.seq_out_executed.mean()*100, "% of seq_out_executed")
print(df.gpt3_correct.mean()*100, "% of gpt3_correct")
print(df.gpt3_executed.mean()*100, "% of gpt3_executed")
print(df.query_executed.mean()*100, "% of query_executed")

In [None]:
# df = df.rename(columns={'t5_answer': 'seq_out_answer'})

In [None]:
df['query_answer', 'GPT3_answer']

In [None]:
df.to_parquet("gpt3_results_2.parquet")

In [None]:
(df.gpt3_answer == 'COMPLETION_ERROR').sum()

In [None]:
from pprint import pprint

In [None]:
for i in range(20):
    thing = df[df['gpt3_correct'] == False].sample()
    failed_result = thing.reset_index().iloc[0].to_dict()
    failed_row = data[int(failed_result['id'].split('|')[1])]
    print("Question: ", failed_row['question'])

    print("GPT3 Query:", failed_result['gpt3_answer'])
    print("GOLD Query:", failed_result['query_answer'])

    try:
        r1 = get_result(failed_row['db_id'], failed_result['gpt3_answer'])
        if len(r1) > 10:
            r1 = r1[:10]
            print("Truncating GPT3 response")
        print("GPT3 QueryResult:", r1)
    except Exception as e:
        print("GPT3 QueryFAILED:", e)
    r2 = get_result(failed_row['db_id'], failed_result['query_answer'])
    if len(r2) > 10:
        r2 = r2[:10]
        print("Truncating GOLD response")
    print("GOLD QueryResult:", r2)
    print("========================================")

In [None]:
data[1549]['question']

In [None]:
df.head()

In [None]:
# response = get_gpt3_response(get_prompt(data[555]))

In [None]:
pd.read_parquet("GPT3_vs_T5.parquet")['gpt3_correct'].sum()

In [None]:
31/44

In [None]:
response['choices'][0]['text']

In [None]:
data[555]['query']

In [None]:
data[555]['seq_out']