# In-context learning for Citation Prediction

In [2]:
import dspy
import pandas as pd
import ast
import numpy as np
import os
from numpy.linalg import norm
from tqdm import tqdm
from pathlib import Path
# from operator import add
from PyPDF2 import PdfReader
from openai import OpenAI
from dspy.evaluate import Evaluate
from dotenv import load_dotenv

## Get the test data

In [3]:
query_candidate_data = pd.read_csv('~/test.qrel.cid', sep=' ', header=None, names=['query', 'candidate', 'bool'])

In [4]:

with open('/Users/jamie/qpaper_to_emb', 'r') as f:
    query_papers = [line.strip() for line in f]

with open('/Users/jamie/cpaper_to_emb', 'r') as f:
    candidate_papers = [line.strip() for line in f]

print(f'len(query_papers): {len(query_papers)}')
print(f'len(candidate_papers): {len(candidate_papers)}')


len(query_papers): 115
len(candidate_papers): 637


In [5]:
counter_4 = 0
valid_rows = pd.DataFrame()
query_dir = '/Users/jamie/s2-folks/examples/python/get_open_access_pdf/query_papers'
candidate_dir = '/Users/jamie/s2-folks/examples/python/get_open_access_pdf/cand_papers_combined'
# Iterate over the rows of the data
for _, row in query_candidate_data.iterrows():
    query_file = os.path.join(query_dir, str(row['query']) + '.pdf')
    candidate_file = os.path.join(candidate_dir, str(row['candidate']) + '.pdf')

    # Check if both files exist
    if os.path.isfile(query_file) and os.path.isfile(candidate_file):
        # If both files exist, append the row to valid_rows
        valid_rows = valid_rows._append(row)
        
# Reset the index of valid_rows
valid_rows.reset_index(drop=True, inplace=True)
print(valid_rows.head())
print(f'Number of query candidate pairs with valid files: {len(valid_rows)}')

     query candidate  bool
0  3498240   1824499     1
1  3498240  53645322     0
2  3498240   1915951     0
3  3498240   3048298     0
4  3498240   3627503     0
Number of query candidate pairs with valid files: 651


In [6]:
data = [{"query_file": query_file, "candidate_file": candidate_file, "cites": bool(bool_)} for query_file, candidate_file, bool_ in zip(valid_rows['query'], valid_rows['candidate'], valid_rows['bool'])]
data = [dspy.Example(**x).with_inputs('query_file', 'candidate_file') for x in data]

def split_data(data, split_ratio, seed=42):
    np.random.seed(seed)
    indices = np.random.permutation(len(data))
    split_index = int(split_ratio * len(data))
    train_indices = indices[:split_index]
    test_indices = indices[split_index:]
    trainset = [data[i] for i in train_indices]
    testset = [data[i] for i in test_indices]
    return trainset, testset

# trainset, testset = split_data(data, 0)
trainset = data


## Chunker

In [7]:
load_dotenv()
llm = dspy.OpenAI(model="gpt-3.5-turbo")
dspy.settings.configure(lm=llm, rm=None)

client = OpenAI(
    # this is also the default, it can be omitted
    api_key=os.environ['OPENAI_API_KEY'],
)

In [8]:
class Chunker:
    def __init__(self, context_window=3000, max_windows=5):
        self.context_window = context_window
        self.max_windows = max_windows
        self.window_overlap = 0.02

    def __call__(self, paper):
        snippet_idx = 0

        while snippet_idx < self.max_windows and paper:
            endpos = int(self.context_window * (1.0 + self.window_overlap))
            snippet, paper = paper[:endpos], paper[endpos:]

            next_newline_pos = snippet.rfind('\n')
            if paper and next_newline_pos != -1 and next_newline_pos >= self.context_window // 2:
                paper = snippet[next_newline_pos+1:] + paper
                snippet = snippet[:next_newline_pos]

            yield snippet_idx, snippet.strip()
            snippet_idx += 1

## DSPy Module

In [9]:
def get_embeddings(texts, model="text-embedding-3-small", save_file=None):
    if save_file and Path(save_file).exists():
        with open(save_file, 'r') as f:
            # print(f"Loading embeddings from {save_file}")
            embeddings = [ast.literal_eval(line.strip()) for line in f]
        return embeddings
        
    try:
        response = client.embeddings.create(input=texts, model=model)
        embeddings = [embedding.embedding for embedding in response.data]
        if save_file: # Save the embeddings to a file
            with open(save_file, 'w') as f:
                # print(f"Saving embeddings to {save_file}")
                for embedding in embeddings:
                    f.write(str(embedding) + '\n')
        return embeddings
    except Exception as e:
        print("Error during API call:", e)
        return []
    
def get_most_similar_chunk(query_embedding, candidate_embeddings, candidate_chunks):
    similarities = np.dot(candidate_embeddings, query_embedding) / (norm(candidate_embeddings, axis=1) * norm(query_embedding))
    most_similar_idx = np.argmax(similarities)
    return candidate_chunks[most_similar_idx]
    
    
class PredictCitation(dspy.Signature):
    __doc__ = """Predict if the two chunks are related by a citation. Consider all possible ways in which a citation could occur, such as direct quotes, paraphrasing, or referring to the same ideas or data. Don't be afraid to predict that the chunks are related by a citation. If you're not sure, it's better to predict that they are related."""   
    query_chunk: str = dspy.InputField(desc='Query chunk to compare to the candidate chunk.')
    candidate_chunk: str = dspy.InputField(desc='Candidate chunk to compare to the query chunk.')
    answer: bool = dspy.OutputField(desc="either True or False", prefix="Answer:")


class PredictCitationAndResolve(dspy.Module):
    def __init__(self, context_window=3000, max_windows=5, resolve_function=any,
                 candidate_folder='/Users/jamie/s2-folks/examples/python/get_open_access_pdf/cand_papers_combined', 
                 query_folder='/Users/jamie/s2-folks/examples/python/get_open_access_pdf/query_papers',
                 reset_embedding=False):
        super().__init__()
        
        self.chunk = Chunker(context_window=context_window, max_windows=max_windows)
        # self.predict = dspy.TypedPredictor(PredictCitation)
        # self.predict = dspy.TypedChainOfThought(PredictCitation)
        self.predict = dspy.ChainOfThought(PredictCitation)
        self.resolve_function = resolve_function
        self.query_folder = query_folder
        self.candidate_folder = candidate_folder
        os.makedirs('embeddings', exist_ok=True)
        if reset_embedding:
            for emb_file in os.listdir('embeddings'):
                os.remove(f'embeddings/{emb_file}')

    def forward(self, query_file, candidate_file):
        predictions = []
        
        # Get the text from the pdfs
        query_pdf = PdfReader(f'{self.query_folder}/{query_file}.pdf')
        query_text = ""
        for page in query_pdf.pages:
            page_text = page.extract_text()
            if page_text:
                query_text += page_text + " "  # Adding space to separate text between pages
        query_text = query_text.replace("\n", " ")
        
        candidate_pdf = PdfReader(f'{self.candidate_folder}/{candidate_file}.pdf')
        candidate_text = ""
        for page in candidate_pdf.pages:
            page_text = page.extract_text()
            if page_text:
                candidate_text += page_text + " "
        candidate_text = candidate_text.replace("\n", " ")
        
        # for each chunk in the paper
        query_chunks = [snippet for _, snippet in self.chunk(query_text)]
        candidate_chunks = [snippet for _, snippet in self.chunk(candidate_text)]
        
        # Create embeddings for the chunks
        candidate_embeddings = get_embeddings(candidate_chunks, save_file=f'embeddings/candidate_{candidate_file}.emb')
        query_embeddings = get_embeddings(query_chunks, save_file=f'embeddings/query_{query_file}.emb')
        
        for snippet, query_embedding in zip(query_chunks, query_embeddings):
            # Get the candidate chunk that is most similar to the snippet
            candidate_chunk = get_most_similar_chunk(query_embedding, candidate_embeddings, candidate_chunks)
            prediction = self.predict(query_chunk=snippet, candidate_chunk=candidate_chunk)
            # print(prediction)
            predictions.append(prediction.answer=='True')

        return dspy.Prediction(predictions=predictions, resolved=self.resolve_function(predictions))

In [10]:
pipeline_chunking = PredictCitationAndResolve(max_windows=15, context_window=1000, reset_embedding=False)

## Example

In [11]:
# chunker = Chunker(context_window=1000, max_windows=15)
# query_pdf = PdfReader(f'/Users/jamie/s2-folks/examples/python/get_open_access_pdf/query_papers/1323414.pdf')
# query_text = ""
# for page in query_pdf.pages:
#     page_text = page.extract_text()
#     if page_text:
#         query_text += page_text + " "  # Adding space to separate text between pages
# query_text = query_text.replace("\n", " ")
# query_chunks = [snippet for _, snippet in chunker(query_text)]
# print(query_chunks)

In [12]:
# print(len(query_chunks[0]))
# print(len(query_chunks))

In [13]:
# # get an example
# example = trainset[-2]
# example_x = example.inputs()
# example_y = example.labels()
# print(example_x)
# print(example_y)

# prediction = pipeline_chunking(**example_x)
# print(prediction)
# print(example_y.cites)

In [14]:
llm.inspect_history(n=5)

## Evaluate

In [15]:
def metric(example, result):
    '''Match metric'''
    return 1 if example.cites == result.resolved else 0

In [16]:
# evaluate = Evaluate(devset=trainset, metric=metric, num_threads=8, display_progress=True, display_table=0, max_errors=100, return_outputs=True)
# outputs = evaluate(pipeline_chunking)

In [17]:
# all_predictions = []
# for x in outputs[1]:
#     if type(x[1])==dspy.Prediction:
#         all_predictions.append(x[1].resolved)
#     else:
#         all_predictions.append(np.nan)
    

# all_labels = [x[0].cites for x in outputs[1]]
# print(len(all_predictions))

# with open('darwin/eval/predictions_COT_large_prompt_1000.txt', 'w') as f:
#     for pred in all_predictions:
#         f.write(str(pred) + '\n')

In [18]:
# # Compute the accuracy of the final predictions
# correct_predictions = [prediction == label for prediction, label in zip(all_predictions, all_labels)]
# accuracy = sum(correct_predictions) / len(correct_predictions)
# print(f'Accuracy: {accuracy:.2f}')

# # Compute the recall of the final predictions
# true_positives = sum([prediction and label for prediction, label in zip(all_predictions, all_labels)])
# false_negatives = sum([not prediction and label for prediction, label in zip(all_predictions, all_labels)])
# recall = true_positives / (true_positives + false_negatives)
# print(f'Recall: {recall: .2f}')

# # Compute the precision of the final predictions
# true_positives = sum([prediction and label for prediction, label in zip(all_predictions, all_labels)])
# false_positives = sum([prediction and not label for prediction, label in zip(all_predictions, all_labels)])
# precision = true_positives / (true_positives + false_positives)
# print(f'Precision: {precision:.2f}')

# # F1 score
# f1 = 2 * (precision * recall) / (precision + recall)
# print(f'F1 Score: {f1:.2f}')

In [19]:
# all_predictions

NameError: name 'all_predictions' is not defined

Weird paper

In [None]:
PdfReader('darwin/query_papers/53079158.pdf').pages[-1].extract_text()

In [20]:
llm.inspect_history(n=5)

## Optimizer Class

### Produce the dataset where dspy will retrieve from
#### Each sample has the following format. "Query Chunk: ...\n Candidate Chunk: ...\n Answer: ...\n  " 

In [24]:
test_papers = []
r_papers = []
with open('/Users/jamie/link-recorder-final-1', 'r') as f:
    for line in f:
        temp = line.split('\t')
        tpaper = temp[0].strip()
        rpaper = temp[1].strip()
        test_papers.append(tpaper)
        r_papers.append(rpaper)
test_retrieved_data = pd.DataFrame({'tpaper': test_papers, 'rpaper': r_papers})

In [36]:
valid_rows_retrieved = pd.DataFrame()
retrieved_dir = '/Users/jamie/s2-folks/examples/python/get_open_access_pdf/r-paper-final'
for _, row in test_retrieved_data.iterrows():
    test_file_1 = os.path.join(query_dir, str(row['tpaper']) + '.pdf')
    test_file_2 = os.path.join(candidate_dir, str(row['tpaper']) + '.pdf')
    r_file = os.path.join(retrieved_dir, str(row['rpaper']) + '.pdf')

    # Check if both files exist
    if (os.path.isfile(test_file_1) or os.path.isfile(test_file_2)) and os.path.isfile(r_file):
        # If both files exist, append the row to valid_rows
        valid_rows_retrieved = valid_rows_retrieved._append(row)
valid_rows_retrieved.reset_index(drop=True, inplace=True)
print(valid_rows_retrieved.head())
print(f'Number of query candidate pairs with valid files: {len(valid_rows_retrieved)}')

     tpaper     rpaper
0  16897790     252854
1   2538574  253145517
2  11633392  113541825
3   4655781    2011582
4   6833818  189960050
Number of query candidate pairs with valid files: 738


In [45]:
# Randoly select 100 to set up the retrieval dataset
dspy_r_set = pd.DataFrame(columns=['query', 'candidate', 'label'])
valid_r_papers = valid_rows_retrieved['rpaper'].to_numpy()
sampled_df = valid_rows_retrieved.sample(n=100)
for _, row in sampled_df.iterrows():
    test_set_paper = row['tpaper']
    retrieved_paper = row['rpaper']
    new_row = {'query': test_set_paper, 'candidate': retrieved_paper, 'label': 1}
    dspy_r_set = pd.concat([dspy_r_set, pd.DataFrame([new_row])], ignore_index=True)
    
    # Get 8 negative samples for one positive sample as in the SPECTER svm experiment.
    neg_papers = np.random.choice(valid_r_papers, size=8)
    for neg_p in neg_papers:
        new_row = {'query': test_set_paper, 'candidate': neg_p, 'label': 0}
        dspy_r_set = pd.concat([dspy_r_set, pd.DataFrame([new_row])], ignore_index=True)
print(dspy_r_set.head())
print(f'Number of query candidate pairs in dspy retrieval set: {len(dspy_r_set)}')

     query  candidate label
0  6869636  235125640     1
1  6869636  114250615     0
2  6869636  111334241     0
3  6869636  189960050     0
4  6869636    2439435     0
Number of query candidate pairs in dspy retrieval set: 900


In [40]:
def get_most_similar_chunk_emb(query_embedding, candidate_embeddings, candidate_chunks):
    similarities = np.dot(candidate_embeddings, query_embedding) / (norm(candidate_embeddings, axis=1) * norm(query_embedding))
    most_similar_idx = np.argmax(similarities)
    return candidate_chunks[most_similar_idx], candidate_embeddings[most_similar_idx]

In [58]:
def get_most_similar_emb_idx(query_embedding, candidate_embeddings):
    similarities = np.dot(candidate_embeddings, query_embedding) / (norm(candidate_embeddings, axis=1) * norm(query_embedding))
    most_similar_idx = np.argmax(similarities)
    return most_similar_idx

In [None]:
# produce retrieval set
query_folder='/Users/jamie/s2-folks/examples/python/get_open_access_pdf/query_papers'
query_folder_2 = '/Users/jamie/s2-folks/examples/python/get_open_access_pdf/cand_papers_combined'
candidate_folder= '/Users/jamie/s2-folks/examples/python/get_open_access_pdf/r-paper-final'
chunk = Chunker(context_window=1000, max_windows=15)
dspy_r_emb = []
dspy_r_text = []
for _, row in dspy_r_set.iterrows():
    # Get the text from the pdfs
    query_file_1 = os.path.join(query_folder, str(row['query']) + '.pdf')
    query_file_2 = os.path.join(query_folder_2, str(row['query']) + '.pdf')
    if os.path.isfile(query_file_1):
        query_file_path = query_file_1
    if os.path.isfile(query_file_2):
        query_file_path = query_file_2
    try:
        query_pdf = PdfReader(query_file_path)
    except:
        cotinue
    query_text = ""
    for page in query_pdf.pages:
        page_text = page.extract_text()
        if page_text:
            query_text += page_text + " "  # Adding space to separate text between pages
    query_text = query_text.replace("\n", " ")
    
    candidate_file = row['candidate']
    try:
        candidate_pdf = PdfReader(f'{candidate_folder}/{candidate_file}.pdf')
    except:
        continue
    candidate_text = ""
    for page in candidate_pdf.pages:
        page_text = page.extract_text()
        if page_text:
            candidate_text += page_text + " "
    candidate_text = candidate_text.replace("\n", " ")
    
    # for each chunk in the paper
    query_chunks = [snippet for _, snippet in chunk(query_text)]
    candidate_chunks = [snippet for _, snippet in chunk(candidate_text)]
    
    # Create embeddings for the chunks
    candidate_embeddings = get_embeddings(candidate_chunks, save_file=f'embeddings/candidate_{candidate_file}.emb')
    query_embeddings = get_embeddings(query_chunks, save_file=f'embeddings/query_{query_file}.emb')
    
    for snippet, query_embedding in zip(query_chunks, query_embeddings):
        # Get the candidate chunk that is most similar to the snippet
        candidate_chunk, c_emb = get_most_similar_chunk_emb(query_embedding, candidate_embeddings, candidate_chunks)
        dspy_r_emb.append((query_embedding, c_emb, row['label']))
        dspy_r_text.append((snippet, candidate_chunk, row['label']))

In [49]:
len(dspy_r_emb), len(dspy_r_text)

(1080, 1080)

In [54]:
dspy_r_emb_concat = []
for q_emb, c_emb, label in dspy_r_emb:
    concat_emb = q_emb + c_emb
    dspy_r_emb_concat.append(concat_emb)

In [59]:
class PredictCitationWithRetrieval(dspy.Signature):
    __doc__ = """Predict if the two chunks are related by a citation. Consider all possible ways in which a citation could occur, such as direct quotes, paraphrasing, or referring to the same ideas or data. Don't be afraid to predict that the chunks are related by a citation. If you're not sure, it's better to predict that they are related."""   
    query_chunk: str = dspy.InputField(desc='Query chunk to compare to the candidate chunk.')
    candidate_chunk: str = dspy.InputField(desc='Candidate chunk to compare to the query chunk.')
    answer: bool = dspy.OutputField(desc="either True or False", prefix="Answer:")
    context: str = dspy.InputField(desc="A good example to learn from.")

In [None]:
class PredictCitationRetrieveAndResolve(dspy.Module):
    def __init__(self, context_window=3000, max_windows=5, resolve_function=any,
                 candidate_folder='/Users/jamie/s2-folks/examples/python/get_open_access_pdf/cand_papers_combined', 
                 query_folder='/Users/jamie/s2-folks/examples/python/get_open_access_pdf/query_papers',
                 reset_embedding=False):
        super().__init__()
        
        self.chunk = Chunker(context_window=context_window, max_windows=max_windows)
        # self.predict = dspy.TypedPredictor(PredictCitation)
        # self.predict = dspy.TypedChainOfThought(PredictCitation)
        self.predict = dspy.Predict(PredictCitationWithRetrieval)
        self.resolve_function = resolve_function
        self.query_folder = query_folder
        self.candidate_folder = candidate_folder
        os.makedirs('embeddings', exist_ok=True)
        if reset_embedding:
            for emb_file in os.listdir('embeddings'):
                os.remove(f'embeddings/{emb_file}')

    def forward(self, query_file, candidate_file):
        predictions = []     
        # Get the text from the pdfs
        query_pdf = PdfReader(f'{self.query_folder}/{query_file}.pdf')
        query_text = ""
        for page in query_pdf.pages:
            page_text = page.extract_text()
            if page_text:
                query_text += page_text + " "  # Adding space to separate text between pages
        query_text = query_text.replace("\n", " ")
        
        candidate_pdf = PdfReader(f'{self.candidate_folder}/{candidate_file}.pdf')
        candidate_text = ""
        for page in candidate_pdf.pages:
            page_text = page.extract_text()
            if page_text:
                candidate_text += page_text + " "
        candidate_text = candidate_text.replace("\n", " ")
        
        # for each chunk in the paper
        query_chunks = [snippet for _, snippet in self.chunk(query_text)]
        candidate_chunks = [snippet for _, snippet in self.chunk(candidate_text)]
        
        # Create embeddings for the chunks
        candidate_embeddings = get_embeddings(candidate_chunks, save_file=f'embeddings/candidate_{candidate_file}.emb')
        query_embeddings = get_embeddings(query_chunks, save_file=f'embeddings/query_{query_file}.emb')
        
        for snippet, query_embedding in zip(query_chunks, query_embeddings):
            # Get the candidate chunk that is most similar to the snippet
            candidate_chunk, candidate_chunk_emb = get_most_similar_chunk_emb(query_embedding, candidate_embeddings, candidate_chunks)
            original_emb_concat = query_embedding + candidate_chunk_emb
            context_idx = get_most_similar_emb_idx(original_emb_concat, dspy_r_emb_concat)
            context_text = dspy_r_text[context_idx]
            if context_text[2]:
                context_answer = "True"
            else:
                context_answer = "False"
            context = f"Query Chunk: {context_text[0]}\nCandidate Chunk: {context_text[1]}\nAnswer: {context_answer}\n"
            prediction = self.predict(query_chunk=snippet, candidate_chunk=candidate_chunk, context=context)
            # print(prediction)
            predictions.append(prediction.answer=='True')
        return dspy.Prediction(context=context, predictions=predictions, resolved=self.resolve_function(predictions))

In [62]:
def metric(example, result):
    '''Match metric'''
    return 1 if example.cites == result.resolved else 0

In [67]:
pipeline_chunking_retrieval = PredictCitationRetrieveAndResolve(max_windows=15, context_window=1000, reset_embedding=False)

In [73]:
evaluate = Evaluate(devset=trainset, metric=metric, num_threads=8, display_progress=True, display_table=0, max_errors=100, return_outputs=True)
outputs = evaluate(pipeline_chunking_retrieval)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)


Error for example in dev set: 		 Socket operation on non-socket

  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
Average Metric: 3 / 3  (100.0):   0%|         | 3/651 [01:38<4:53:29, 27.18s/it][A
Average Metric: 4 / 4  (100.0):   0%|         | 3/651 [01:41<4:53:29, 27.18s/it][A
Average Metric: 4 / 4  (100.0):   1%|         | 4/651 [01:44<3:21:07, 18.65s/it][A
Average Metric: 5 / 5  (100.0):   1%|         | 4/651 [01:46<3:21:07, 18.65s/it][A
Average Metric: 5 / 5  (100.0):   1%| 

Error for example in dev set: 		 Socket operation on non-socket


Average Metric: 7 / 8  (87.5):   1%|          | 8/651 [02:17<1:53:02, 10.55s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
Average Metric: 7 / 9  (77.8):   1%|▏         | 9/651 [02:29<2:09:10, 12.07s/it][A
Average Metric: 8 / 10  (80.0):   1%|         | 9/651 [02:36<2:09:10, 12.07s/it][A
Average Metric: 8 / 10  (80.0):   2%|        | 10/651 [02:38<1:59:49, 11.22s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 9 / 11  (81.8):   2%|        | 10/651 [02:43<1:59:49, 11.22s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

Average Metric: 9 / 12  (75.0):   2%|▏       | 12/651 [03:05<2:12:27, 12.44s/it][A
  return v1_cached_gpt3_turbo_

Error for example in dev set: 		 Socket operation on non-socket


Average Metric: 12 / 16  (75.0):   2%|▏      | 15/651 [03:41<1:51:41, 10.54s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
Average Metric: 13 / 19  (68.4):   3%|▏      | 18/651 [04:27<2:29:07, 14.14s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
Average Metric: 13 / 20  (65.0):   3%|▏      | 19/651 [04:40<2:18:18, 13.13s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
Average Metric: 14 / 21  (66.7):   3%|▏      | 20/651 [04:5

Error for example in dev set: 		 Socket operation on non-socket


Average Metric: 31 / 41  (75.6):   6%|▍      | 40/651 [08:20<2:06:23, 12.41s/it][A
Average Metric: 31 / 41  (75.6):   6%|▍      | 41/651 [08:24<1:54:30, 11.26s/it][A
Average Metric: 32 / 42  (76.2):   6%|▍      | 41/651 [08:26<1:54:30, 11.26s/it][A
Average Metric: 32 / 42  (76.2):   6%|▍      | 42/651 [08:28<1:33:50,  9.25s/it][A
Average Metric: 33 / 43  (76.7):   6%|▍      | 42/651 [08:30<1:33:50,  9.25s/it][A
Average Metric: 33 / 43  (76.7):   7%|▍      | 43/651 [08:35<1:25:46,  8.47s/it][A
Average Metric: 34 / 44  (77.3):   7%|▍      | 43/651 [08:40<1:25:46,  8.47s/it][A
Average Metric: 34 / 44  (77.3):   7%|▍      | 44/651 [08:43<1:24:06,  8.31s/it][A
Average Metric: 35 / 45  (77.8):   7%|▍      | 44/651 [08:47<1:24:06,  8.31s/it][A
Average Metric: 35 / 45  (77.8):   7%|▍      | 45/651 [08:55<1:28:43,  8.78s/it][A
Average Metric: 36 / 46  (78.3):   7%|▍      | 45/651 [09:00<1:28:43,  8.78s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_

Error for example in dev set: 		 Socket operation on non-socket

Average Metric: 54 / 70  (77.1):  11%|▋      | 69/651 [12:47<1:42:16, 10.54s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

Average Metric: 55 / 71  (77.5):  11%|▊      | 70/651 [13:02<1:40:34, 10.39s/it][A
Average Metric: 55 / 71  (77.5):  11%|▊      | 71/651 [13:08<1:51:46, 11.56s/it][A
Average Metric: 56 / 72  (77.8):  11%|▊      | 71/651 [13:13<1:51:46, 11.56s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 56 / 72  (77.8):  11%|▊      | 72/651 [13:19<1:51:33, 11.56s/it][A
Average Metric: 56 / 73  (76.7):  11%|▊      | 72/651 [13:26<1:51:33, 11.56s/it][A
Average Metric: 56 / 73  (76.7):  11%|▊      | 73/651 [13:31<1:50:36, 11.48s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
Average Metric: 56 / 74  (75.7):  11%|▊      | 74/651 [13:38<1:42:15, 10.63s/it][A
Average Metric: 57 / 75  (76.0):  11%|▊      | 74/651 [13:43<1:42:15

Error for example in dev set: 		 negative seek value -1


  return v1_cached_gpt3_turbo_request_v2(**kwargs)

Average Metric: 69 / 88  (78.4):  14%|▉      | 88/651 [16:15<2:03:30, 13.16s/it][A
Average Metric: 69.0 / 89  (77.5):  14%|▋    | 88/651 [16:19<2:03:30, 13.16s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 69.0 / 89  (77.5):  14%|▋    | 89/651 [16:24<1:53:58, 12.17s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
Average Metric: 70.0 / 90  (77.8):  14%|▋    | 90/651 [16:32<1:45:40, 11.30s/it][A
Average Metric: 70.0 / 91  (76.9):  14%|▋    | 90/651 [16:36<1:45:40, 11.30s/it][A
Average Metric: 70.0 / 91  (76.9):  14%|▋    | 91/651 [16:38<1:34:08, 10.09s/it][A
Average Metric: 71.0 / 92  (77.2):  14%|▋    | 91/651 [16:41<1:34:08, 10.09s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
Average Metric: 72.0 / 93  (77.4):  14%|▋    | 92/651 [16:52<1:23:04,  8.92s/it][A
Average Metri

Error for example in dev set: 		 Socket operation on non-socket

Average Metric: 73.0 / 98  (74.5):  15%|▋    | 97/651 [18:22<2:57:14, 19.20s/it][A
Average Metric: 73.0 / 98  (74.5):  15%|▊    | 98/651 [18:24<2:57:31, 19.26s/it][A
Average Metric: 74.0 / 99  (74.7):  15%|▊    | 98/651 [18:30<2:57:31, 19.26s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 75.0 / 100  (75.0):  15%|▌   | 99/651 [18:39<2:23:08, 15.56s/it][A
Average Metric: 75.0 / 100  (75.0):  15%|▍  | 100/651 [18:44<2:12:34, 14.44s/it]

Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}


[A
Average Metric: 76.0 / 101  (75.2):  15%|▍  | 100/651 [18:48<2:12:34, 14.44s/it]

Error for example in dev set: 		 shapes (0,) and (1536,) not aligned: 0 (dim 0) != 1536 (dim 0)


[A
Average Metric: 76.0 / 101  (75.2):  16%|▍  | 101/651 [18:51<1:53:17, 12.36s/it][A
Average Metric: 77.0 / 102  (75.5):  16%|▍  | 101/651 [18:54<1:53:17, 12.36s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 77.0 / 102  (75.5):  16%|▍  | 102/651 [18:58<1:37:20, 10.64s/it][A
Average Metric: 78.0 / 103  (75.7):  16%|▍  | 102/651 [19:00<1:37:20, 10.64s/it][A
Average Metric: 78.0 / 103  (75.7):  16%|▍  | 103/651 [19:05<1:27:55,  9.63s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 79.0 / 104  (76.0):  16%|▍  | 104/651 [19:16<1:27:23,  9.59s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 79.0 / 105  (75.2):  16%|▍  | 104/651 [19:22<1:27:23,  9.59s/it][A
Average Metric: 79.0 / 105  (75.2):  16%|▍  | 105/651 [19:26<1:32:04, 10.12s/it][A
Average Metric: 80.0 / 106  (75.5):  16%|▍  | 105/651 [19:31<1:32:04, 10.12s/it][A
Average Metric: 80.0 / 106  (75.

Error for example in dev set: 		 Socket operation on non-socket

[A
Average Metric: 95.0 / 124  (76.6):  19%|▌  | 123/651 [24:02<2:42:23, 18.45s/it][A
Average Metric: 95.0 / 124  (76.6):  19%|▌  | 124/651 [24:11<2:28:32, 16.91s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

Average Metric: 97.0 / 126  (77.0):  19%|▌  | 125/651 [24:37<2:30:29, 17.17s/it][A
Average Metric: 97.0 / 126  (77.0):  19%|▌  | 126/651 [24:42<2:28:07, 16.93s/it][A
Average Metric: 98.0 / 127  (77.2):  19%|▌  | 126/651 [24:45<2:28:07, 16.93s/it][A
Average Metric: 98.0 / 127  (77.2):  20%|▌  | 127/651 [24:49<2:01:37, 13.93s/it][A
Average Metric: 99.0 / 128  (77.3):  20%|▌  | 127/651 [24:53<2:01:37, 13.93s/it][A
Average Metric: 99.0 / 128  (77.3):  20%|▌  | 128/651 [25:00<1:49:48, 12.60s/it][A
Average Metric: 100.0 / 129  (77.5):  20%|▍ | 128/651 [25:06<1:49:48, 12.60s/it][A
Average Metric: 100.0 / 129  (77.5):  20%|▍ | 129/651 [25:11<1:47:17, 12.33s/it][

Error for example in dev set: 		 Socket operation on non-socket

  return v1_cached_gpt3_turbo_request_v2(**kwargs)

Average Metric: 106.0 / 135  (78.5):  21%|▍ | 135/651 [26:31<2:10:57, 15.23s/it][A
Average Metric: 106.0 / 136  (77.9):  21%|▍ | 135/651 [26:36<2:10:57, 15.23s/it][A
Average Metric: 106.0 / 136  (77.9):  21%|▍ | 136/651 [26:41<1:59:46, 13.95s/it][A
Average Metric: 107.0 / 137  (78.1):  21%|▍ | 136/651 [26:46<1:59:46, 13.95s/it][A
Average Metric: 107.0 / 137  (78.1):  21%|▍ | 137/651 [26:49<1:43:24, 12.07s/it][A
Average Metric: 108.0 / 138  (78.3):  21%|▍ | 137/651 [26:53<1:43:24, 12.07s/it][A
Average Metric: 108.0 / 138  (78.3):  21%|▍ | 138/651 [26:55<1:30:12, 10.55s/it][A
Average Metric: 109.0 / 139  (78.4):  21%|▍ | 138/651 [26:58<1:30:12, 10.55s/it][A
Average Metric: 109.0 / 139  (78.4):  21%|▍ | 139/651 [27:00<1:16:26,  8.96s/it][A
Average Metric: 109.0 / 140  (77.9):  21%|▍ | 139/651 [27:28<1:16:26,  8.96s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

Average Metric: 110.0 / 141  (78.0):  22%|▍ | 140/651 [2

Error for example in dev set: 		 Socket operation on non-socket

[A
Average Metric: 167.0 / 211  (79.1):  32%|▋ | 211/651 [43:08<2:05:48, 17.15s/it][A
Average Metric: 168.0 / 212  (79.2):  32%|▋ | 211/651 [43:15<2:05:48, 17.15s/it][A
Average Metric: 168.0 / 212  (79.2):  33%|▋ | 212/651 [43:19<1:55:01, 15.72s/it][A
Average Metric: 169.0 / 213  (79.3):  33%|▋ | 212/651 [43:25<1:55:01, 15.72s/it][A
Average Metric: 169.0 / 213  (79.3):  33%|▋ | 213/651 [43:33<1:45:05, 14.40s/it][A
Average Metric: 169.0 / 214  (79.0):  33%|▋ | 213/651 [43:42<1:45:05, 14.40s/it][A
Average Metric: 169.0 / 214  (79.0):  33%|▋ | 214/651 [43:48<1:51:51, 15.36s/it][A
Average Metric: 170.0 / 215  (79.1):  33%|▋ | 214/651 [43:50<1:51:51, 15.36s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

Average Metric: 171.0 / 216  (79.2):  33%|▋ | 215/651 [43:59<1:28:11, 12.14s/it][A
Average Metric: 171.0 / 216  (79.2):  33%|▋ | 216/651 [44:06<1:28:44, 12.24s/it][A
Average Metric: 172.0 / 217  (79.3):  33%|▋ | 216/651

Error for example in dev set: 		 Socket operation on non-socket

  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)

Average Metric: 181.0 / 230  (78.7):  35%|▋ | 229/651 [47:00<1:18:12, 11.12s/it][A
Average Metric: 181.0 / 230  (78.7):  35%|▋ | 230/651 [47:03<2:12:55, 18.94s/it][A
Average Metric: 181.0 / 231  (78.4):  35%|▋ | 230/651 [47:05<2:12:55, 18.94s/it][A
Average Metric: 181.0 / 231  (78.4):  35%|▋ | 231/651 [47:10<1:48:15, 15.47s/it][A
Average Metric: 182.0 / 232  (78.4):  35%|▋ | 231/651 [47:17<1:48:15, 15.47s/it][A
Average Metric: 182.0 / 232  (78.4):  36%|▋ | 232/651 [47:21<1:36:29, 13.82s/it][A
Average Metric: 182.0 / 233  (78.1):  36%|▋ | 232/651 [47:24<1:36:29, 13.82s/it][A
Average Metric: 182.0 / 233  (78.1):  36%|▋ | 233/651 [47:28<1:23:40, 12.01s/it][A
Average Metric: 183.0 / 234  (78.2):  36%|▋ | 233/651 [47:34<1:23:40, 12.01s/it][A
Average Metric: 183.0 / 234  (78.2):  36%|▋ | 234/651 [47:39<1:18:55, 11.36s/it][A
Average Metric: 184.0 / 235  (78.3):  36%|▋ | 234/651 [47

Error for example in dev set: 		 Socket operation on non-socket

[A
Average Metric: 215.0 / 271  (79.3):  41%|▊ | 270/651 [55:43<1:05:35, 10.33s/it][A
Average Metric: 215.0 / 271  (79.3):  42%|▊ | 271/651 [55:49<1:15:49, 11.97s/it][A
Average Metric: 215.0 / 272  (79.0):  42%|▊ | 271/651 [55:53<1:15:49, 11.97s/it][A
Average Metric: 215.0 / 272  (79.0):  42%|▊ | 272/651 [55:55<1:07:49, 10.74s/it][A
Average Metric: 215.0 / 273  (78.8):  42%|▊ | 272/651 [55:58<1:07:49, 10.74s/it][A
Average Metric: 215.0 / 273  (78.8):  42%|█▋  | 273/651 [56:01<56:03,  8.90s/it][A
Average Metric: 216.0 / 274  (78.8):  42%|█▋  | 273/651 [56:03<56:03,  8.90s/it][A
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
[A
Average Metric: 217.0 / 275  (78.9):  42%|█▋  | 274/651 [56:25<48:57,  7.79s/it][A
Average Metric: 217.0 / 275  (78.9):  42%|▊ | 275/651 [56:30<1:17:01, 12.29s/it][A
Average Metric: 218.0 / 276  (79.0):  42%|▊ | 275/651 [56:34<1:17:01, 12.29s/it][A
Average Metric: 218.0 / 276  (79.0):  42%|▊ | 276/651 [56:38<1:11:12, 11.39s/it][A
Average Metric: 2

Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}



Average Metric: 324.0 / 405  (80.0):  62%|█▏| 405/651 [1:27:40<49:05, 11.97s/it][A

Error for example in dev set: 		 shapes (0,) and (1536,) not aligned: 0 (dim 0) != 1536 (dim 0)
Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}





Error for example in dev set: 		 shapes (0,) and (1536,) not aligned: 0 (dim 0) != 1536 (dim 0)


Average Metric: 324.0 / 406  (79.8):  62%|█▏| 405/651 [1:27:58<49:05, 11.97s/it][A
Average Metric: 324.0 / 406  (79.8):  62%|█▏| 406/651 [1:27:59<57:52, 14.17s/it][A
Average Metric: 325.0 / 407  (79.9):  62%|█▏| 406/651 [1:28:08<57:52, 14.17s/it][A
Average Metric: 325.0 / 407  (79.9):  63%|█▎| 407/651 [1:28:11<53:45, 13.22s/it][A
Average Metric: 326.0 / 408  (79.9):  63%|█▎| 407/651 [1:28:19<53:45, 13.22s/it][A
Average Metric: 326.0 / 408  (79.9):  63%|█▎| 408/651 [1:28:21<50:32, 12.48s/it][A
Average Metric: 327.0 / 409  (80.0):  63%|█▎| 408/651 [1:28:46<50:32, 12.48s/it][A
Average Metric: 327.0 / 409  (80.0):  63%|▋| 409/651 [1:28:49<1:08:53, 17.08s/it[A
Average Metric: 328.0 / 410  (80.0):  63%|▋| 409/651 [1:28:50<1:08:53, 17.08s/it[A
Average Metric: 328.0 / 410  (80.0):  63%|█▎| 410/651 [1:28:53<52:27, 13.06s/it][A
Average Metric: 329.0 / 411  (80.0):  63%|█▎| 410/651 [1:28:57<52:27, 13.06s/it][A
Average Metric: 329.0 / 411  (80.0):  63%|█▎| 411/651 [1:29:02<44:58, 11.25s

Error for example in dev set: 		 PyCryptodome is required for AES algorithm



Average Metric: 451.0 / 559  (80.7):  86%|█▋| 558/651 [2:06:59<19:28, 12.57s/it][A
Average Metric: 451.0 / 559  (80.7):  86%|█▋| 559/651 [2:06:59<23:14, 15.16s/it][A


Error for example in dev set: 		 PyCryptodome is required for AES algorithm


Average Metric: 451.0 / 560  (80.5):  86%|█▋| 559/651 [2:07:00<23:14, 15.16s/it][A
Average Metric: 451.0 / 560  (80.5):  86%|█▋| 560/651 [2:07:01<17:00, 11.21s/it][A
Average Metric: 452.0 / 561  (80.6):  86%|█▋| 560/651 [2:07:12<17:00, 11.21s/it][A
Average Metric: 452.0 / 561  (80.6):  86%|█▋| 561/651 [2:07:14<17:25, 11.62s/it][A
Average Metric: 453.0 / 562  (80.6):  86%|█▋| 561/651 [2:07:36<17:25, 11.62s/it][A
Average Metric: 453.0 / 562  (80.6):  86%|█▋| 562/651 [2:07:38<22:44, 15.34s/it][A
Average Metric: 454.0 / 563  (80.6):  86%|█▋| 562/651 [2:07:50<22:44, 15.34s/it][A
Average Metric: 454.0 / 563  (80.6):  86%|█▋| 563/651 [2:07:51<21:45, 14.84s/it][A
Average Metric: 455.0 / 564  (80.7):  86%|█▋| 563/651 [2:07:52<21:45, 14.84s/it][A
Average Metric: 455.0 / 564  (80.7):  87%|█▋| 564/651 [2:07:54<16:06, 11.11s/it][A
Average Metric: 455.0 / 565  (80.5):  87%|█▋| 564/651 [2:07:55<16:06, 11.11s/it][A
Average Metric: 455.0 / 565  (80.5):  87%|█▋| 565/651 [2:07:59<12:59,  9.06s

Error for example in dev set: 		 negative seek value -1



Average Metric: 477.0 / 592  (80.6):  91%|█▊| 591/651 [2:14:30<10:44, 10.75s/it][A
Average Metric: 477.0 / 592  (80.6):  91%|█▊| 592/651 [2:14:32<08:37,  8.76s/it][A
Average Metric: 478.0 / 593  (80.6):  91%|█▊| 592/651 [2:14:33<08:37,  8.76s/it][A
Average Metric: 478.0 / 593  (80.6):  91%|█▊| 593/651 [2:14:35<06:49,  7.07s/it][A

Error for example in dev set: 		 negative seek value -1



Average Metric: 479.0 / 594  (80.6):  91%|█▊| 593/651 [2:14:59<06:49,  7.07s/it][A
Average Metric: 479.0 / 594  (80.6):  91%|█▊| 594/651 [2:15:01<12:17, 12.94s/it][A
Average Metric: 480.0 / 595  (80.7):  91%|█▊| 594/651 [2:15:15<12:17, 12.94s/it][A
Average Metric: 480.0 / 595  (80.7):  91%|█▊| 595/651 [2:15:16<12:47, 13.71s/it][A
Average Metric: 481.0 / 596  (80.7):  91%|█▊| 595/651 [2:15:40<12:47, 13.71s/it][A
Average Metric: 481.0 / 596  (80.7):  92%|█▊| 596/651 [2:15:42<15:42, 17.14s/it][A
Average Metric: 481.0 / 597  (80.6):  92%|█▊| 596/651 [2:15:46<15:42, 17.14s/it][A
Average Metric: 481.0 / 597  (80.6):  92%|█▊| 597/651 [2:15:49<12:37, 14.03s/it][A
Average Metric: 481.0 / 598  (80.4):  92%|█▊| 597/651 [2:15:55<12:37, 14.03s/it][A
Average Metric: 481.0 / 598  (80.4):  92%|█▊| 598/651 [2:15:58<11:03, 12.52s/it][A
Average Metric: 481.0 / 599  (80.3):  92%|█▊| 598/651 [2:16:14<11:03, 12.52s/it][A
Average Metric: 481.0 / 599  (80.3):  92%|█▊| 599/651 [2:16:15<12:18, 14.21

Average Metric: 523.0 / 651  (80.3%)



  df = df.applymap(truncate_cell)


In [75]:
llm.inspect_history(n=5)





Predict if the two chunks are related by a citation. Consider all possible ways in which a citation could occur, such as direct quotes, paraphrasing, or referring to the same ideas or data. Don't be afraid to predict that the chunks are related by a citation. If you're not sure, it's better to predict that they are related.

---

Follow the following format.

Query Chunk: Query chunk to compare to the candidate chunk.

Candidate Chunk: Candidate chunk to compare to the query chunk.

Context: A good example to learn from.

Answer: either True or False

---

Query Chunk:  m nmmm vvvv  321CNN Extracted descriptor vector sequence 1st-person activity video … … Time series representation Per-frame feature representation … Temporal pooling Final representation sum pooling ‘histogram of time series gradients’ pooling … … … … … n*m dimensional data (e.g., n = 4096 features, m = 1000 frames) n time series (e.g., 4096) k temporal filters (e.g., 15) n*k-D vector (e.g., 61440) … max pooling 

In [None]:
all_predictions = []
for x in outputs[1]:
    if type(x[1])==dspy.Prediction:
        all_predictions.append(x[1].resolved)
    else:
        all_predictions.append(np.nan)
    

all_labels = [x[0].cites for x in outputs[1]]
print(len(all_predictions))

In [78]:
with open('./predictions_one_shot_prompt_1000.txt', 'w') as f:
    for pred in all_predictions:
        f.write(str(pred) + '\n')

In [79]:
print(len(all_predictions))

651


In [80]:
# Compute the accuracy of the final predictions
correct_predictions = [prediction == label for prediction, label in zip(all_predictions, all_labels)]
accuracy = sum(correct_predictions) / len(correct_predictions)
print(f'Accuracy: {accuracy:.2f}')

# Compute the recall of the final predictions
true_positives = sum([prediction and label for prediction, label in zip(all_predictions, all_labels)])
false_negatives = sum([not prediction and label for prediction, label in zip(all_predictions, all_labels)])
recall = true_positives / (true_positives + false_negatives)
print(f'Recall: {recall: .2f}')

# Compute the precision of the final predictions
true_positives = sum([prediction and label for prediction, label in zip(all_predictions, all_labels)])
false_positives = sum([prediction and not label for prediction, label in zip(all_predictions, all_labels)])
precision = true_positives / (true_positives + false_positives)
print(f'Precision: {precision:.2f}')

# F1 score
f1 = 2 * (precision * recall) / (precision + recall)
print(f'F1 Score: {f1:.2f}')

Accuracy: 0.80
Recall:  0.07
Precision: 0.21
F1 Score: 0.10
