# In-context learning for Citation Prediction

In [1]:
import dspy
import pandas as pd
import ast
import numpy as np
import os
from numpy.linalg import norm
from tqdm import tqdm
from pathlib import Path
# from operator import add
from PyPDF2 import PdfReader
from openai import OpenAI
from dspy.evaluate import Evaluate
# import random
# from dotenv import load_dotenv

np.random.seed(42)

  from .autonotebook import tqdm as notebook_tqdm


## Get the test data

In [2]:
query_candidate_data = pd.read_csv('darwin/test.qrel.cid', sep=' ', header=None, names=['query', 'candidate', 'bool'])

In [3]:
query_dir = 'darwin/query_papers'
candidate_dir = 'darwin/candidate_papers'
retrieved_dir = 'darwin/retrieved-papers'

with open('darwin/qpaper_to_emb', 'r') as f:
    query_papers = [line.strip() for line in f]

with open('darwin/cpaper_to_emb', 'r') as f:
    candidate_papers = [line.strip() for line in f]

print(f'len(query_papers): {len(query_papers)}')
print(f'len(candidate_papers): {len(candidate_papers)}')


len(query_papers): 115
len(candidate_papers): 637


In [4]:
counter_4 = 0
valid_rows = pd.DataFrame()
# Iterate over the rows of the data
for _, row in query_candidate_data.iterrows():
    query_file = os.path.join(query_dir, str(row['query']) + '.pdf')
    candidate_file = os.path.join(candidate_dir, str(row['candidate']) + '.pdf')

    # Check if both files exist
    if os.path.isfile(query_file) and os.path.isfile(candidate_file):
        # If both files exist, append the row to valid_rows
        valid_rows = valid_rows._append(row)
        
# Reset the index of valid_rows
valid_rows.reset_index(drop=True, inplace=True)
print(valid_rows.head())
print(f'Number of query candidate pairs with valid files: {len(valid_rows)}')

     query candidate  bool
0  3498240   1824499     1
1  3498240  53645322     0
2  3498240   1915951     0
3  3498240   3048298     0
4  3498240   3627503     0
Number of query candidate pairs with valid files: 651


In [5]:
data = [{"query_file": query_file, "candidate_file": candidate_file, "cites": bool(bool_)} for query_file, candidate_file, bool_ in zip(valid_rows['query'], valid_rows['candidate'], valid_rows['bool'])]
data = [dspy.Example(**x).with_inputs('query_file', 'candidate_file') for x in data]

def split_data(data, split_ratio, seed=42):
    np.random.seed(seed)
    indices = np.random.permutation(len(data))
    split_index = int(split_ratio * len(data))
    train_indices = indices[:split_index]
    test_indices = indices[split_index:]
    trainset = [data[i] for i in train_indices]
    testset = [data[i] for i in test_indices]
    return trainset, testset

# trainset, testset = split_data(data, 0)
trainset = data


## Produce the dataset where dspy will retrieve from
Each sample has the following format. "Query Chunk: ...\n Candidate Chunk: ...\n Answer: ...\n  " 

In [6]:
test_papers = []
test_paper_paths = []
r_paper_paths = []
r_papers = []
with open('darwin/link-recorder-final-1', 'r') as f:
    for line in f:
        temp = line.split('\t')
        tpaper = temp[0].strip()
        rpaper = temp[1].strip()
        # Check if tpaper exists in query_papers or candidate_papers
        if tpaper in query_papers:
            # print(f'tpaper {tpaper} exists in query_papers')
            tpaper_path = os.path.join(query_dir, tpaper + '.pdf')
        elif tpaper in candidate_papers:
            tpaper_path = os.path.join(candidate_dir, tpaper + '.pdf')
        else:
            # print(f'tpaper {tpaper} does not exist in query_papers or candidate_papers')
            continue
        r_paper_path = os.path.join(retrieved_dir, rpaper + '.pdf')
        
        test_papers.append(tpaper)
        test_paper_paths.append(tpaper_path)
        r_papers.append(rpaper)
        r_paper_paths.append(r_paper_path)
test_retrieved_data = pd.DataFrame({'tpaper': test_papers, 'rpaper': r_papers, 'tpaper_path': test_paper_paths, 'rpaper_path': r_paper_paths})

In [7]:
test_retrieved_data

Unnamed: 0,tpaper,rpaper,tpaper_path,rpaper_path
0,1845461,58230747,darwin/candidate_papers/1845461.pdf,darwin/retrieved-papers/58230747.pdf
1,18542228,18542228,darwin/candidate_papers/18542228.pdf,darwin/retrieved-papers/18542228.pdf
2,52943195,222420988,darwin/candidate_papers/52943195.pdf,darwin/retrieved-papers/222420988.pdf
3,17153635,244273135,darwin/candidate_papers/17153635.pdf,darwin/retrieved-papers/244273135.pdf
4,24269741,79055678,darwin/candidate_papers/24269741.pdf,darwin/retrieved-papers/79055678.pdf
...,...,...,...,...
232,33508063,187967027,darwin/candidate_papers/33508063.pdf,darwin/retrieved-papers/187967027.pdf
233,5280973,17823317,darwin/candidate_papers/5280973.pdf,darwin/retrieved-papers/17823317.pdf
234,7103019,222209029,darwin/candidate_papers/7103019.pdf,darwin/retrieved-papers/222209029.pdf
235,58604136,207945705,darwin/candidate_papers/58604136.pdf,darwin/retrieved-papers/207945705.pdf


In [8]:
valid_rows_retrieved = pd.DataFrame()
retrieved_dir = 'darwin/retrieved-papers'
for _, row in test_retrieved_data.iterrows():
    # # Check if both files exist
    if os.path.isfile(row['tpaper_path']) and os.path.isfile(row['rpaper_path']):
        # If both files exist, append the row to valid_rows
        valid_rows_retrieved = valid_rows_retrieved._append(row)
valid_rows_retrieved.reset_index(drop=True, inplace=True)
print(valid_rows_retrieved.head())
print(f'Number of query candidate pairs with valid files: {len(valid_rows_retrieved)}')

     tpaper     rpaper                           tpaper_path  \
0   1845461   58230747   darwin/candidate_papers/1845461.pdf   
1  18542228   18542228  darwin/candidate_papers/18542228.pdf   
2  52943195  222420988  darwin/candidate_papers/52943195.pdf   
3  17153635  244273135  darwin/candidate_papers/17153635.pdf   
4  24269741   79055678  darwin/candidate_papers/24269741.pdf   

                             rpaper_path  
0   darwin/retrieved-papers/58230747.pdf  
1   darwin/retrieved-papers/18542228.pdf  
2  darwin/retrieved-papers/222420988.pdf  
3  darwin/retrieved-papers/244273135.pdf  
4   darwin/retrieved-papers/79055678.pdf  
Number of query candidate pairs with valid files: 237


In [9]:
valid_rows_retrieved.head()

Unnamed: 0,tpaper,rpaper,tpaper_path,rpaper_path
0,1845461,58230747,darwin/candidate_papers/1845461.pdf,darwin/retrieved-papers/58230747.pdf
1,18542228,18542228,darwin/candidate_papers/18542228.pdf,darwin/retrieved-papers/18542228.pdf
2,52943195,222420988,darwin/candidate_papers/52943195.pdf,darwin/retrieved-papers/222420988.pdf
3,17153635,244273135,darwin/candidate_papers/17153635.pdf,darwin/retrieved-papers/244273135.pdf
4,24269741,79055678,darwin/candidate_papers/24269741.pdf,darwin/retrieved-papers/79055678.pdf


In [10]:
# Randoly select 100 to set up the retrieval dataset
dspy_r_set = pd.DataFrame(columns=['query', 'candidate', 'label'])

# valid_r_papers = valid_rows_retrieved['rpaper'].to_numpy()
sampled_df = valid_rows_retrieved.sample(n=100, random_state=42)

queries = []
query_paths = []
candidates = []
candidate_paths = []
labels = []
for _, row in sampled_df.iterrows():
    test_set_paper = row['tpaper']
    retrieved_paper = row['rpaper']
    queries.append(test_set_paper)
    candidates.append(retrieved_paper)
    query_paths.append(row['tpaper_path'])
    candidate_paths.append(row['rpaper_path'])
    labels.append(True)
    assert os.path.join(retrieved_dir, retrieved_paper + '.pdf') == row['rpaper_path'], f'{os.path.join(retrieved_dir, retrieved_paper + ".pdf")} != {row["rpaper_path"]}'
 
    # Get 8 negative samples for one positive sample as in the SPECTER svm experiment.
    neg_papers = valid_rows_retrieved.sample(n=8)
    rpapers = neg_papers['rpaper'].values
    rpaper_paths = neg_papers['rpaper_path'].values
    for rpaper_path, rpaper in zip(rpaper_paths, rpapers):
        queries.append(test_set_paper)
        candidates.append(rpaper)
        labels.append(False)
        query_paths.append(row['tpaper_path'])
        candidate_paths.append(rpaper_path)
    
dspy_r_set = pd.DataFrame({'query': queries, 'candidate': candidates, 'label': labels, 'query_path': query_paths, 'candidate_path': candidate_paths})

print(f'Number of query candidate pairs in dspy retrieval set: {len(dspy_r_set)}')
print(dspy_r_set.head())

Number of query candidate pairs in dspy retrieval set: 900
     query  candidate  label                           query_path  \
0  1624655   55801602   True  darwin/candidate_papers/1624655.pdf   
1  1624655   55801602  False  darwin/candidate_papers/1624655.pdf   
2  1624655   92798697  False  darwin/candidate_papers/1624655.pdf   
3  1624655  253224152  False  darwin/candidate_papers/1624655.pdf   
4  1624655   10302429  False  darwin/candidate_papers/1624655.pdf   

                          candidate_path  
0   darwin/retrieved-papers/55801602.pdf  
1   darwin/retrieved-papers/55801602.pdf  
2   darwin/retrieved-papers/92798697.pdf  
3  darwin/retrieved-papers/253224152.pdf  
4   darwin/retrieved-papers/10302429.pdf  


## Chunker

In [11]:
llm = dspy.OpenAI(model="gpt-3.5-turbo")
dspy.settings.configure(lm=llm, rm=None)

client = OpenAI(
    # this is also the default, it can be omitted
    api_key=os.environ['OPENAI_API_KEY'],
)

In [12]:
class Chunker:
    def __init__(self, context_window=3000, max_windows=5):
        self.context_window = context_window
        self.max_windows = max_windows
        self.window_overlap = 0.02

    def __call__(self, paper):
        snippet_idx = 0

        while snippet_idx < self.max_windows and paper:
            endpos = int(self.context_window * (1.0 + self.window_overlap))
            snippet, paper = paper[:endpos], paper[endpos:]

            next_newline_pos = snippet.rfind('\n')
            if paper and next_newline_pos != -1 and next_newline_pos >= self.context_window // 2:
                paper = snippet[next_newline_pos+1:] + paper
                snippet = snippet[:next_newline_pos]

            yield snippet_idx, snippet.strip()
            snippet_idx += 1

## Generate Retrieval Data

In [13]:
def get_embeddings(texts, model="text-embedding-3-small", save_file=None):
    if save_file and Path(save_file).exists():
        with open(save_file, 'r') as f:
            # print(f"Loading embeddings from {save_file}")
            embeddings = [ast.literal_eval(line.strip()) for line in f]
            # assert len(embeddings) == len(texts), f"Number of embeddings does not match number of texts {len(embeddings)} != {len(texts)}"
        return embeddings
        
    try:
        response = client.embeddings.create(input=texts, model=model)
        embeddings = [embedding.embedding for embedding in response.data]
        assert len(embeddings) == len(texts), "Number of embeddings does not match number of texts"
        if save_file: # Save the embeddings to a file
            with open(save_file, 'w') as f:
                # print(f"Saving embeddings to {save_file}")
                for embedding in embeddings:
                    f.write(str(embedding) + '\n')
        return embeddings
    except Exception as e:
        print("Error during API call:", e)
        return []
    

In [14]:
def get_most_similar_chunk_emb(query_embedding, candidate_embeddings, candidate_chunks):
    similarities = np.dot(candidate_embeddings, query_embedding) / (norm(candidate_embeddings, axis=1) * norm(query_embedding))
    most_similar_idx = np.argmax(similarities)
    return candidate_chunks[most_similar_idx], candidate_embeddings[most_similar_idx]

In [15]:
def get_most_similar_emb_idx(query_embedding, candidate_embeddings):
    similarities = np.dot(candidate_embeddings, query_embedding) / (norm(candidate_embeddings, axis=1) * norm(query_embedding))
    most_similar_idx = np.argmax(similarities)
    return most_similar_idx

In [16]:
dspy_r_set.to_csv('darwin/dspy_r_set.csv', index=False)

In [17]:
# produce retrieval set
chunk = Chunker(context_window=1000, max_windows=15)
dspy_r_emb = []
dspy_r_text = []
dspy_r_emb_concat = []
for _, row in tqdm(dspy_r_set.iterrows(), total=len(dspy_r_set)):
    # Get the text from the pdfs
    query_file = row['query']
    query_file_path = row['query_path']
    try:
        # print(f'reading query file {row["query_path"]}')
        query_pdf = PdfReader(query_file_path)
    except:
        # print(f'error reading query file {row["query_path"]}')
        continue
    query_text = ""
    for page in query_pdf.pages:
        page_text = page.extract_text()
        if page_text:
            query_text += page_text + " "  # Adding space to separate text between pages
    query_text = query_text.replace("\n", " ")

    candidate_file = row['candidate']
    try:
        # print(f'reading candidate file {row["candidate_path"]}')
        candidate_pdf = PdfReader(row['candidate_path'])
        
    except:
        print(f'Error reading candidate file {row["candidate_path"]}')
        continue
    candidate_text = ""
    for page in candidate_pdf.pages:
        page_text = page.extract_text()
        if page_text:
            candidate_text += page_text + " "
    candidate_text = candidate_text.replace("\n", " ")

    # for each chunk in the paper
    query_chunks = [snippet for _, snippet in chunk(query_text)]
    candidate_chunks = [snippet for _, snippet in chunk(candidate_text)]
    # print(f'len(query_chunks): {len(query_chunks)} len(candidate_chunks): {len(candidate_chunks)}')

    # Create embeddings for the chunks
    candidate_embeddings = get_embeddings(candidate_chunks, save_file=f'new_embeddings/candidate_{candidate_file}.emb')
    query_embeddings = get_embeddings(query_chunks, save_file=f'new_embeddings/query_{query_file}.emb')
    assert len(candidate_embeddings) == len(candidate_chunks), f"Number of embeddings does not match number of texts {len(candidate_embeddings)} != {len(candidate_chunks)}"

    for snippet, query_embedding in zip(query_chunks, query_embeddings):
        # Get the candidate chunk that is most similar to the snippet
        if len(candidate_embeddings) == 0 or len(query_embedding) == 0:
            continue
        if len(candidate_embeddings) != len(candidate_chunks):
            print('Error')
            print(f'len(candidate_embeddings): {len(candidate_embeddings)}')
            print(f'len(candidate_chunks): {len(candidate_chunks)}')
            continue
        # assert(len(candidate_embeddings) == len(candidate_chunks))
        candidate_chunk, c_emb = get_most_similar_chunk_emb(query_embedding, candidate_embeddings, candidate_chunks)
        dspy_r_emb.append((query_embedding, c_emb, row['label']))
        dspy_r_text.append((snippet, candidate_chunk, row['label']))
        dspy_r_emb_concat.append(query_embedding + c_emb)


  2%|▏         | 20/900 [00:18<10:55,  1.34it/s]

Error reading candidate file darwin/retrieved-papers/49409974.pdf


  8%|▊         | 73/900 [01:21<13:59,  1.01s/it]

Error reading candidate file darwin/retrieved-papers/199472931.pdf


 10%|▉         | 86/900 [01:39<14:01,  1.03s/it]

Error reading candidate file darwin/retrieved-papers/230670622.pdf


 10%|▉         | 89/900 [01:42<12:34,  1.07it/s]

Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 13%|█▎        | 116/900 [02:09<07:40,  1.70it/s]

Error reading candidate file darwin/retrieved-papers/230670622.pdf


 14%|█▍        | 125/900 [02:19<11:30,  1.12it/s]

Error reading candidate file darwin/retrieved-papers/230670622.pdf


 15%|█▍        | 133/900 [02:26<10:17,  1.24it/s]

Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 19%|█▉        | 174/900 [03:00<12:14,  1.01s/it]

Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 28%|██▊       | 249/900 [04:31<05:23,  2.01it/s]

Error reading candidate file darwin/retrieved-papers/1511942.pdf


 30%|██▉       | 266/900 [04:48<10:32,  1.00it/s]

Error reading candidate file darwin/retrieved-papers/199472931.pdf


 32%|███▏      | 287/900 [05:31<27:59,  2.74s/it]

Error reading candidate file darwin/retrieved-papers/199472931.pdf


 32%|███▏      | 288/900 [05:34<27:54,  2.74s/it]

Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 34%|███▎      | 303/900 [05:55<10:09,  1.02s/it]

Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}


 37%|███▋      | 332/900 [07:26<43:24,  4.59s/it]  

Error reading candidate file darwin/retrieved-papers/199472931.pdf


 62%|██████▏   | 558/900 [11:50<07:32,  1.32s/it]

Error reading candidate file darwin/retrieved-papers/1511942.pdf


 72%|███████▏  | 649/900 [16:17<06:47,  1.62s/it]

Error reading candidate file darwin/retrieved-papers/49409974.pdf


 72%|███████▏  | 652/900 [16:22<06:41,  1.62s/it]

Error reading candidate file darwin/retrieved-papers/49409974.pdf


 76%|███████▋  | 688/900 [17:15<02:31,  1.40it/s]

Error reading candidate file darwin/retrieved-papers/230670622.pdf


 84%|████████▍ | 759/900 [22:03<03:32,  1.50s/it]

Error reading candidate file darwin/retrieved-papers/49409974.pdf


 88%|████████▊ | 791/900 [22:51<02:45,  1.52s/it]

Error reading candidate file darwin/retrieved-papers/199472931.pdf


 91%|█████████ | 820/900 [23:23<01:57,  1.47s/it]

Error reading candidate file darwin/retrieved-papers/1511942.pdf


 93%|█████████▎| 833/900 [24:03<01:37,  1.45s/it]

Error reading candidate file darwin/retrieved-papers/1511942.pdf


 94%|█████████▎| 843/900 [24:09<00:31,  1.82it/s]

Error reading candidate file darwin/retrieved-papers/1511942.pdf


100%|██████████| 900/900 [29:09<00:00,  1.94s/it]


## DSPy Module

In [18]:
class PredictCitationWithRetrieval(dspy.Signature):
    __doc__ = """Predict if the two chunks are related by a citation. Consider all possible ways in which a citation could occur, such as direct quotes, paraphrasing, or referring to the same ideas or data. Don't be afraid to predict that the chunks are related by a citation. If you're not sure, it's better to predict that they are related."""   
    query_chunk: str = dspy.InputField(desc='Query chunk to compare to the candidate chunk.')
    candidate_chunk: str = dspy.InputField(desc='Candidate chunk to compare to the query chunk.')
    answer: bool = dspy.OutputField(desc="either True or False", prefix="Answer:")
    example: str = dspy.InputField(desc="A good example to learn from.")

In [26]:
class PredictCitationRetrieveAndResolve(dspy.Module):
    def __init__(self, context_window=3000, max_windows=5, resolve_function=any,
                 candidate_folder='darwin/candidate_papers/', 
                 query_folder='darwin/query_papers',
                 reset_embedding=False):
        super().__init__()
        
        self.chunk = Chunker(context_window=context_window, max_windows=max_windows)
        # self.predict = dspy.TypedPredictor(PredictCitation)
        # self.predict = dspy.TypedChainOfThought(PredictCitation)
        self.predict = dspy.ChainOfThought(PredictCitationWithRetrieval)
        # self.predict = dspy.Predict(PredictCitationWithRetrieval)
        self.resolve_function = resolve_function
        self.query_folder = query_folder
        self.candidate_folder = candidate_folder
        os.makedirs('embeddings', exist_ok=True)
        os.makedirs('new_embeddings', exist_ok=True)
        if reset_embedding:
            for emb_file in os.listdir('embeddings'):
                os.remove(f'embeddings/{emb_file}')
            for emb_file in os.listdir('new_embeddings'):
                os.remove(f'new_embeddings/{emb_file}')

    def forward(self, query_file, candidate_file):
        predictions = []     
        # Get the text from the pdfs
        query_pdf = PdfReader(f'{self.query_folder}/{query_file}.pdf')
        query_text = ""
        for page in query_pdf.pages:
            page_text = page.extract_text()
            if page_text:
                query_text += page_text + " "  # Adding space to separate text between pages
        query_text = query_text.replace("\n", " ")
        
        candidate_pdf = PdfReader(f'{self.candidate_folder}/{candidate_file}.pdf')
        candidate_text = ""
        for page in candidate_pdf.pages:
            page_text = page.extract_text()
            if page_text:
                candidate_text += page_text + " "
        candidate_text = candidate_text.replace("\n", " ")
        
        # for each chunk in the paper
        query_chunks = [snippet for _, snippet in self.chunk(query_text)]
        candidate_chunks = [snippet for _, snippet in self.chunk(candidate_text)]
        
        # Create embeddings for the chunks
        candidate_embeddings = get_embeddings(candidate_chunks, save_file=f'embeddings/candidate_{candidate_file}.emb')
        query_embeddings = get_embeddings(query_chunks, save_file=f'embeddings/query_{query_file}.emb')
        
        for snippet, query_embedding in zip(query_chunks, query_embeddings):
            # Get the candidate chunk that is most similar to the snippet
            candidate_chunk, candidate_chunk_emb = get_most_similar_chunk_emb(query_embedding, candidate_embeddings, candidate_chunks)
            original_emb_concat = query_embedding + candidate_chunk_emb
            context_idx = get_most_similar_emb_idx(original_emb_concat, dspy_r_emb_concat)
            context_text = dspy_r_text[context_idx]
            if context_text[2]:
                context_answer = "True"
            else:
                context_answer = "False"
            example = f"Query Chunk: {context_text[0]}\nCandidate Chunk: {context_text[1]}\nAnswer: {context_answer}\n"
            prediction = self.predict(query_chunk=snippet, candidate_chunk=candidate_chunk, example=example)
            # print(prediction)
            predictions.append(prediction.answer=='True')
            if prediction.answer == 'True':
                break
        return dspy.Prediction(example=example, predictions=predictions, resolved=self.resolve_function(predictions))

In [27]:
def metric(example, result):
    '''Match metric'''
    return 1 if example.cites == result.resolved else 0

In [28]:
pipeline_chunking_retrieval = PredictCitationRetrieveAndResolve(max_windows=15, context_window=1000, reset_embedding=False)

In [29]:
# get an example
example = trainset[-2]
example_x = example.inputs()
example_y = example.labels()
print(example_x)
print(example_y)

prediction = pipeline_chunking_retrieval(**example_x)
print(prediction)
print(example_y.cites)

Example({'query_file': 1323414, 'candidate_file': '3324808'}) (input_keys=None)
Example({'cites': False}) (input_keys=None)
Prediction(
    example='Query Chunk: ll et al., 1999a,b). Imaging protocol. The MRI protocol collected for each subject was a whole-brain, gradient-echo (spoiled gradient recalled acquisition in asteady state) T1-weighted series collected in the sagittal plane withrepetition time of 24 msec, echo time of 5 msec, two excitations, ﬂip angle of 45 °,ﬁeld of view of 24 cm, 124 slices with section thickness of 1.2 mm, no gaps, and an imaging time of 19 min. Image analysis. MR images from each individual were processed with a series of manual and automated procedures that included the followingsteps: (1) automated linear transformation (Woods et al., 1993) of theimages into a standard orientation with scaling to remove global differ-ences in head size allowing assessment of local changes in brain size ortissue density; (2) classi ﬁcation of brain images into gray matte

In [30]:
llm.inspect_history()





Predict if the two chunks are related by a citation. Consider all possible ways in which a citation could occur, such as direct quotes, paraphrasing, or referring to the same ideas or data. Don't be afraid to predict that the chunks are related by a citation. If you're not sure, it's better to predict that they are related.

---

Follow the following format.

Query Chunk: Query chunk to compare to the candidate chunk.

Candidate Chunk: Candidate chunk to compare to the query chunk.

Example: A good example to learn from.

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: either True or False

---

Query Chunk: JNERJOURNAL OF NEUROENGINEERING AND REHABILITATIONSalazar-Varas et al. Journal of NeuroEngineering and Rehabilitation (2015) 12:101 DOI 10.1186/s12984-015-0095-4 RESEARCH Open Access Analyzing EEG signals to detect unexpected obstacles during walking R. Salazar-Varas1,Á .C o s t a2,E .I á ñ e z2*,A .Ú b e d a2,E .H o r t a l2a n dJ .M .A z

In [31]:
evaluate = Evaluate(devset=trainset, metric=metric, num_threads=8, display_progress=True, display_table=0, max_errors=100, return_outputs=True)
outputs = evaluate(pipeline_chunking_retrieval)

  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_r

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_r

Error for example in dev set: 		 negative seek value -1


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)


Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}


Average Metric: 56.0 / 102  (54.9):  16%|█▌        | 102/651 [53:53<4:23:39, 28.82s/it]

Error for example in dev set: 		 shapes (0,) and (1536,) not aligned: 0 (dim 0) != 1536 (dim 0)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_r

Error for example in dev set: 		 cannot access local variable 'example' where it is not associated with a value


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 98.0 / 166  (59.0):  25%|██▌       | 165/651 [1:28:49<2:38:33, 19.57s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 98.0 / 166  (59.0):  25%|██▌       | 166/651 [1:28:52<2:10:47, 16.18s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 98.0 / 167  (58.7):  26%|██▌       | 167/651 [1:29:13<2:23:22, 17.77s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 101.0 / 174  (58.0):  27%|██▋       | 173/651 [1:31:08<1:50:45, 13.90s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)
Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 101.0 / 175  (57.7):  27%|██▋       | 175/651 [1:31:12<1:06:41,  8.41s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 101.0 / 176  (57.4):  27%|██▋       | 175/651 [1:31:21<1:06:41,  8.41s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 101.0 / 178  (56.7):  27%|██▋       | 177/651 [1:31:31<1:07:07,  8.50s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)
Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 101.0 / 180  (56.1):  28%|██▊       | 180/651 [1:31:41<48:02,  6.12s/it]  

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 102.0 / 182  (56.0):  28%|██▊       | 182/651 [1:31:54<46:30,  5.95s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 102.0 / 184  (55.4):  28%|██▊       | 184/651 [1:31:55<24:27,  3.14s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)
Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 104.0 / 187  (55.6):  29%|██▊       | 186/651 [1:32:03<28:05,  3.63s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 104.0 / 188  (55.3):  29%|██▊       | 187/651 [1:32:09<27:56,  3.61s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 104.0 / 189  (55.0):  29%|██▉       | 188/651 [1:32:11<29:23,  3.81s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)
Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_r

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_r

Error for example in dev set: 		 source code string cannot contain null bytes


Average Metric: 208.0 / 370  (56.2):  57%|█████▋    | 370/651 [3:02:02<50:11, 10.72s/it]  

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 208.0 / 371  (56.1):  57%|█████▋    | 371/651 [3:02:03<38:03,  8.15s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 208.0 / 372  (55.9):  57%|█████▋    | 372/651 [3:02:12<39:42,  8.54s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 208.0 / 373  (55.8):  57%|█████▋    | 373/651 [3:02:16<33:03,  7.14s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 208.0 / 375  (55.5):  58%|█████▊    | 375/651 [3:02:31<35:04,  7.63s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 209.0 / 378  (55.3):  58%|█████▊    | 378/651 [3:02:55<35:54,  7.89s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 210.0 / 380  (55.3):  58%|█████▊    | 380/651 [3:03:11<35:25,  7.84s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 210.0 / 381  (55.1):  59%|█████▊    | 381/651 [3:03:25<44:06,  9.80s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 210.0 / 382  (55.0):  59%|█████▊    | 382/651 [3:03:25<31:10,  6.95s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 210.0 / 383  (54.8):  59%|█████▉    | 383/651 [3:03:37<37:37,  8.42s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 210.0 / 384  (54.7):  59%|█████▉    | 384/651 [3:03:37<26:36,  5.98s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 210.0 / 385  (54.5):  59%|█████▉    | 385/651 [3:03:38<19:03,  4.30s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 210.0 / 386  (54.4):  59%|█████▉    | 386/651 [3:03:48<26:46,  6.06s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


Average Metric: 210.0 / 387  (54.3):  59%|█████▉    | 387/651 [3:03:51<23:13,  5.28s/it]

Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)
Error for example in dev set: 		 unmatched ']' (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)


Error during API call: Error code: 400 - {'error': {'message': "'$.input' is invalid. Please check the API reference: https://platform.openai.com/docs/api-reference.", 'type': 'invalid_request_error', 'param': None, 'code': None}}
Error for example in dev set: 		 shapes (0,) and (1536,) not aligned: 0 (dim 0) != 1536 (dim 0)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_r

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 272.0 / 516  (52.7):  79%|███████▉  | 516/651 [4:06:11<1:03:14, 28.11s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 272.0 / 518  (52.5):  79%|███████▉  | 517/651 [4:06:48<1:00:38, 27.15s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 275.0 / 522  (52.7):  80%|████████  | 521/651 [4:07:13<26:08, 12.07s/it]  

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 277.0 / 525  (52.8):  80%|████████  | 524/651 [4:07:38<19:09,  9.05s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 277.0 / 526  (52.7):  81%|████████  | 526/651 [4:07:41<12:05,  5.81s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


Average Metric: 277.0 / 527  (52.6):  81%|████████  | 527/651 [4:08:42<46:41, 22.59s/it]

Error for example in dev set: 		 '[' was never closed (<unknown>, line 1)


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 291.0 / 560  (52.0):  86%|████████▌ | 559/651 [4:24:31<28:44, 18.75s/it]

Error for example in dev set: 		 PyCryptodome is required for AES algorithm


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 308.0 / 592  (52.0):  91%|█████████ | 592/651 [4:39:36<24:31, 24.94s/it]

Error for example in dev set: 		 negative seek value -1


  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
  return v1_cached_gpt3_turbo_request_v2(**kwargs)
Average Metric: 344.0 / 651  (52.8): 100%|██████████| 651/651 [5:13:48<00:00, 28.92s/it] 

Average Metric: 344.0 / 651  (52.8%)





In [32]:
llm.inspect_history(n=5)





Predict if the two chunks are related by a citation. Consider all possible ways in which a citation could occur, such as direct quotes, paraphrasing, or referring to the same ideas or data. Don't be afraid to predict that the chunks are related by a citation. If you're not sure, it's better to predict that they are related.

---

Follow the following format.

Query Chunk: Query chunk to compare to the candidate chunk.

Candidate Chunk: Candidate chunk to compare to the query chunk.

Example: A good example to learn from.

Reasoning: Let's think step by step in order to ${produce the answer}. We ...

Answer: either True or False

---

Query Chunk:  m nmmm vvvv  321CNN Extracted descriptor vector sequence 1st-person activity video … … Time series representation Per-frame feature representation … Temporal pooling Final representation sum pooling ‘histogram of time series gradients’ pooling … … … … … n*m dimensional data (e.g., n = 4096 features, m = 1000 frames) n time series (e.g.

In [33]:
all_predictions = []
for x in outputs[1]:
    if type(x[1])==dspy.Prediction:
        all_predictions.append(x[1].resolved)
    else:
        all_predictions.append(np.nan)
    

all_labels = [x[0].cites for x in outputs[1]]
print(len(all_predictions))

651


In [34]:
with open('./predictions_one_shot_prompt_1000.txt', 'w') as f:
    for pred in all_predictions:
        f.write(str(pred) + '\n')

In [35]:
print(len(all_predictions))

651


In [36]:
# Compute the accuracy of the final predictions
correct_predictions = [prediction == label for prediction, label in zip(all_predictions, all_labels)]
accuracy = sum(correct_predictions) / len(correct_predictions)
print(f'Accuracy: {accuracy:.2f}')

# Compute the recall of the final predictions
true_positives = sum([prediction and label for prediction, label in zip(all_predictions, all_labels)])
false_negatives = sum([not prediction and label for prediction, label in zip(all_predictions, all_labels)])
recall = true_positives / (true_positives + false_negatives)
print(f'Recall: {recall: .2f}')

# Compute the precision of the final predictions
true_positives = sum([prediction and label for prediction, label in zip(all_predictions, all_labels)])
false_positives = sum([prediction and not label for prediction, label in zip(all_predictions, all_labels)])
precision = true_positives / (true_positives + false_positives)
print(f'Precision: {precision:.2f}')

# F1 score
f1 = 2 * (precision * recall) / (precision + recall)
print(f'F1 Score: {f1:.2f}')

Accuracy: 0.53
Recall:  0.57
Precision: 0.19
F1 Score: 0.29
