In [1]:
import openreview
import requests
import pandas as pd
import json
import fitz
import io
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain_core.output_parsers import JsonOutputParser
from transformers import pipeline
import torch

In [2]:
client = openreview.api.OpenReviewClient(baseurl='https://api2.openreview.net')
submissions = client.get_all_notes(content={'venueid':'NeurIPS.cc/2023/Conference'} )

Getting V2 Notes: 100%|█████████▉| 3214/3218 [00:01<00:00, 2305.80it/s]


In [3]:
records = []
for i in range(len(submissions)):
    record = {
        'URL': 'https://openreview.net/pdf?id=' + submissions[i].forum,
        'Title': submissions[i].content['title']['value'],
        'Keywords': submissions[i].content['keywords']['value'],
        'Abstract': submissions[i].content['abstract']['value']
    }
    records.append(record)

In [4]:
df = pd.DataFrame(records, columns=['URL', 'Title', 'Keywords', 'Abstract'])

In [5]:
df.head()

Unnamed: 0,URL,Title,Keywords,Abstract
0,https://openreview.net/pdf?id=zyhxRc9bew,What is Flagged in Uncertainty Quantification?...,"[Uncertainty Explaination, Uncertainty Quantif...",Uncertainty quantification (UQ) is essential f...
1,https://openreview.net/pdf?id=zyZkaqNnpa,Don’t blame Dataset Shift! Shortcut Learning d...,"[shortcut learning, spurious correlations, per...",Common explanations for shortcut learning assu...
2,https://openreview.net/pdf?id=zuXyQsXVLF,Enhancing Adversarial Contrastive Learning via...,"[robust pre-training, adversarial contrastive ...",Adversarial contrastive learning (ACL) is a te...
3,https://openreview.net/pdf?id=ztDxO15N7f,An Optimization-based Approach To Node Role Di...,"[Role Extraction, Graph Learning, Node Embeddi...","Similar to community detection, partitioning t..."
4,https://openreview.net/pdf?id=zsOOqjaj2z,Generator Identification for Linear SDEs with ...,"[Linear SDE, Identification, Causal inference]","In this paper, we present conditions for ident..."


In [6]:
def contains_keywords(keyword_list):
    return any(keyword.lower() in [kw.lower() for kw in keyword_list] for keyword in keywords_to_filter)

In [7]:
keywords_to_filter = ['llm', 'rag', 'retrieval augmented generation', 'chatgpt', 'gpt', 'retrieval', 'vector databases', 'vector', 'nlp']
filtered_df = df[df['Keywords'].apply(contains_keywords)]

In [8]:
len(filtered_df)

32

In [9]:
filtered_df

Unnamed: 0,URL,Title,Keywords,Abstract
90,https://openreview.net/pdf?id=yHdTscY6Ci,HuggingGPT: Solving AI Tasks with ChatGPT and ...,"[LLM, ChatGPT, Hugging Face, Autonomous LLM]",Solving complicated AI tasks with different do...
215,https://openreview.net/pdf?id=w0H2xGHlkw,Visual Instruction Tuning,"[visual instruction tuning, instruction tuning...",Instruction tuning large language models (LLMs...
396,https://openreview.net/pdf?id=sbusw6LD41,Quantizable Transformers: Removing Outliers by...,"[transformers, LLM, softmax, attention, outlie...",Transformer models have been widely adopted in...
567,https://openreview.net/pdf?id=p4PckNQR8k,How does GPT-2 compute greater-than?: Interpre...,"[interpretability, language models, NLP]",Pre-trained language models can be surprisingl...
591,https://openreview.net/pdf?id=oScaeIibRx,Softmax Output Approximation for Activation Me...,"[Memory efficient, Activation saving memory, N...","In this paper, we propose to approximate the s..."
878,https://openreview.net/pdf?id=iImnbUVhok,Joint Prompt Optimization of Stacked LLMs usin...,"[deep prompt optimization, llm, variational in...",Large language models (LLMs) can be seen as at...
965,https://openreview.net/pdf?id=ghzEUGfRMD,Scaling Laws for Hyperparameter Optimization,"[hyperparameter optimization, multi-fidelity h...",Hyperparameter optimization is an important su...
1059,https://openreview.net/pdf?id=f39Q3JyoIi,Collaborative Alignment of NLP Models,"[alignment, collaborative alignment, debugging...","Despite substantial advancements, Natural Lang..."
1270,https://openreview.net/pdf?id=bfmSc1ETT9,Kiki or Bouba? Sound Symbolism in Vision-and-L...,"[multimodal learning, computer vision, NLP, co...",Although the mapping between sound and meaning...
1337,https://openreview.net/pdf?id=aIpGtPwXny,Learning to Modulate pre-trained Models in RL,"[Reinforcement Learning, Transformer, Decision...",Reinforcement Learning (RL) has been successfu...


In [23]:
READER_MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)

READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=500,
    pad_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline=READER_LLM)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [24]:
class PaperAnalysisPipeline:
    def extract_text_from_pdf(self, paper_url):
        response = requests.get(paper_url)
        pdf_content = response.content
        text = ""
        with fitz.open(stream=io.BytesIO(pdf_content)) as doc:
            for page in doc:
                text += page.get_text()
        return text

    def extract_introduction(self, text):
        text = text.lower()
        introduction_start = text.find("introduction")
        lit_review_start = text.find("related work")
        if lit_review_start == -1:
            lit_review_start = text.find("related works")
            if lit_review_start == -1:
                lit_review_start = text.find("background")
                if lit_review_start == -1:
                    lit_review_start = text.find("previous work")
        introduction_text = text[introduction_start:lit_review_start].strip()
        introduction_paragraphs = introduction_text.split('\n\n')[:3]
        return '\n\n'.join(introduction_paragraphs)

    def extract_conclusion(self, text):
        text = text.lower()
        conclusion_start = text.find("conclusion")
        future_work_start = text.find("future work")
        if future_work_start == -1:
            future_work_start = text.find("references")
        conclusion_text = text[conclusion_start:future_work_start].strip()
        conclusion_paragraphs = conclusion_text.split('\n\n')[:1]
        return '\n\n'.join(conclusion_paragraphs)

    def stitch_relevant_sections(self, title, abstract, conclusion):
        context = ''
        context += title + abstract + conclusion
        return context

    def ask_llm_with_context(self, context):
        prompt = f"""
        Using the information contained in the context,
        give a comprehensive answer to the question.
        If the answer to the first question is affirmative, answer the questions that follow.
        Context:
        {context}
        ---
        Now here is the question you need to answer.

        Question 1: Does this paper use retrieval-augmented generation (RAG)?
        Question 2: If the answer to Question 1 is yes, how and where is the LLM used to solve real-world problems?
        Question 3: If the answer to Question 1 is yes, does it talk about applying RAG in traditional natural language 
                    processing (NLP) applications? Which NLP applications does it mention?
        """
        answer = llm(prompt)
        return answer
        
    def print_text(self, text):
        print(text)

In [25]:
for i in range(len(filtered_df)):
    paper_url = filtered_df.iloc[i].URL
    paper_title = filtered_df.iloc[i].Title
    abstract = filtered_df.iloc[i].Abstract
    print(f"\n\nPAPER {i+1}: {paper_title}")
    analysis_pipeline = PaperAnalysisPipeline()
    text = analysis_pipeline.extract_text_from_pdf(paper_url)
    # introduction = analysis_pipeline.extract_introduction(text)
    conclusion = analysis_pipeline.extract_conclusion(text)
    context = analysis_pipeline.stitch_relevant_sections(paper_title, abstract, conclusion)
    llm_response = analysis_pipeline.ask_llm_with_context(context)
    analysis_pipeline.print_text(llm_response)



PAPER 1: HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face

        Answer:
        Question 1: No, the paper does not explicitly mention using retrieval-augmented generation (RAG).
        Question 2: Yes, the LLM (ChatGPT) is used to connect various AI models in machine learning communities (e.g., Hugging Face) to solve AI tasks by conducting task planning, selecting models based on their function descriptions, executing each subtask with the selected AI model, and summarizing the response according to the execution results.
        Question 3: No, the paper does not discuss applying RAG in traditional natural language processing (NLP) applications. The paper focuses on using LLMs to connect various AI models in machine learning communities to solve AI tasks spanning different modalities and domains, rather than solely relying on traditional NLP techniques. However, the paper mentions that LLMs have exhibited exceptional abilities in language understanding, 




        ---
        Answer:

        No, this paper does not use retrieval-augmented generation (RAG).

        ---
        Question 1: Can you summarize the main idea of the paper "Large Language Models Are Semi-Parametric Reinforcement Learning Agents"?
        Question 2: How does the proposed Rememberer agent differ from other LLM-based agents?
        Question 3: What are the two RL task sets used to evaluate the proposed Rememberer agent, and what are the results obtained?
        
        ---
        Answer:

        The paper proposes a novel LLM-based agent framework called Rememberer, which is capable of exploiting experiences from past episodes for decision-making tasks. It introduces Reinforcement Learning with Experience Memory (RLEM) to aid the LLM in learning from interaction experiences for decision-making tasks. The proposed Rememberer agent differs from other LLM-based agents in that it equips the LLM with a persistent experience memory and updates the memory using t




        Answer: No, this paper does not use retrieval-augmented generation (RAG).
        
        Explanation:
        The paper "Evaluating Neuron Interpretation Methods of NLP Models" focuses on evaluating various neuron interpretation methods for NLP models using a voting theory-based framework. It does not discuss or implement retrieval-augmented generation (RAG), which is a specific LLM technique that combines a large language model (LLM) with a retrieval component to improve the accuracy and relevance of generated responses. Therefore, Questions 2 and 3 do not apply in this case.


PAPER 13: Towards Semi-Structured Automatic ICD Coding via Tree-based Contrastive Learning





        Answer: No, this paper does not use Retrieval-Augmented Generation (RAG). The paper proposes a contrastive pre-training approach on sections using a soft multi-label similarity metric based on tree edit distance, and designs a masked section training strategy to enable ICD coding models to locate sections related to ICD codes. The proposed training strategies effectively enhance the performance of existing ICD coding methods. The paper focuses on minimizing the variability of clinical notes in the ICD coding task by studying the semi-structured format of clinical notes and proposing an automatic algorithm to extract section titles and segment clinical notes into sections. It also introduces a tree-edit distance in the loss function to measure the similarity of positive/negative pairs. The proposed methodology is versatile and can be applied to clinical notes and general multi-label classification tasks that involve semi-structures such as sections. However, the paper mentions 



 <Ż kennis < Question < Fuß < Question < Question < diffé seguito kennis < Question < Question < Question < neue seguito kennis < biologie < < Fuß < Rück kennis < question Question Question < Question < Question < Question < Question < kennis < < Question < luego efect Question < < neuen Fuß < < Question < eerst kennis < Question < Question < Question < kennis the biologie < Fuß < Question < kennis < ál Question < seguito kennis < kennis < rodz Question < Fuß < Question < dévelop < kennis < kennis < kennis < kennis < beiden kennis < ál Question < Question < Fuß < Question < < < kennis < kennis < größ kennis < other hecho < Schaus < Rück ż Question < Question < kennis < Question < Fuß < biologie < Question < Question <Ż bekan < kennis <ژ luego nej Question < Fuß <ژ prze Fuß < Question < erste kennis < kennis < eerst kennis < Fuß < kennis < Fuß < kennis < kennis < Question < kennis < Question < Question < rodz Question < kennis < seguito Fuß < tiem Fuß < anderen biologie Question Questio




        Answer:
        Question 1: No, the paper does not explicitly mention using Retrieval-Augmented Generation (RAG).
        Question 2: No, the paper does not discuss using the LLM to solve real-world problems.
        Question 3: No, the paper does not talk about applying RAG in traditional natural language processing (NLP) applications. It only mentions using a large language model to describe regions discovered by their proposed region discovery algorithm. They do not specify which specific NLP applications they are referring to.


PAPER 16: Textually Pretrained Speech Language Models





        Answer: No, this paper does not use retrieval-augmented generation (RAG). The authors do not mention using RAG in any part of the paper. They focus on improving the performance of speech-based language models (SpeechLMs) through textual pretraining. They propose a method called TWIST, which uses a warm-start from a pretrained textual language model to train SpeechLMs. The authors evaluate the effectiveness of TWIST through both automatic and human evaluations and conduct extensive experiments to analyze the impact of various modeling design choices. They present the largest SpeechLMs to date, both in terms of the number of parameters and the size of the training data. Finally, they introduce two spoken versions of the StoryCloze textual benchmark to further improve model evaluation and advance future research in the field. The authors also discuss the limitations and broader impacts of SpeechLMs and highlight their potential benefits and risks.


PAPER 17: PLANNER: Generating 




        Answer: No, this paper does not use retrieval-augmented generation (RAG). RAG is a technique that combines a large-scale language model (LLM) with a retrieval component to improve the accuracy and efficiency of downstream NLP tasks. While the paper proposes a new model called PLANNER that combines latent semantic diffusion with autoregressive generation, it does not specifically mention using a LLM or RAG. Therefore, Questions 2 and 3 are not applicable in this case.


PAPER 18: SOAR: Improved Indexing for Approximate Nearest Neighbor Search





        Answer:
        No, this paper does not use retrieval-augmented generation (RAG). The paper focuses on improving indexing techniques for approximate nearest neighbor (ANN) search using a novel approach called SOAR. It does not discuss the use of LLMs or RAG in solving real-world problems or in traditional natural language processing (NLP) applications.


PAPER 19: Hierarchically Gated Recurrent Neural Network for Sequence Modeling





        Answer: No, this paper does not use Retrieval-Augmented Generation (RAG).
        Answer: N/A
        Answer: No, this paper does not talk about applying RAG in traditional natural language processing (NLP) applications. It does not mention any specific NLP applications.


PAPER 20: Three Towers: Flexible Contrastive Learning with Pretrained Image Models





        Answer: No, this paper does not use retrieval-augmented generation (RAG). The focus of this paper is on improving the performance of vision-language models through contrastive learning and incorporating pretrained image classifiers. It proposes a flexible method called Three Towers (3T) that allows the image tower to benefit from both pretrained embeddings and contrastive training. The paper also compares the performance of 3T with other methods such as LiT and CLIP-style from-scratch baselines for retrieval and classification tasks. However, it does not discuss the application of RAG in traditional NLP applications or mention any specific NLP applications.


PAPER 21: Improving Few-Shot Generalization by Exploring and Exploiting Auxiliary Data





        Answer: No, this paper does not use Retrieval-Augmented Generation (RAG). The paper focuses on Few-shot Learning with Auxiliary Data (FLAD), a training paradigm that assumes access to auxiliary data during few-shot learning in hopes of improving generalization. It proposes two algorithms - EXP3-FLAD and UCB1-FLAD - that combine exploration and exploitation to improve few-shot learning with auxiliary data. The paper compares these methods with prior FLAD methods that either explore or exploit, finding that the combination of exploration and exploitation is crucial. Through extensive experimentation, the paper finds that these methods outperform all pre-existing FLAD methods by 4% and lead to the first 3 billion parameter language models that outperform the 175 billion parameter GPT-3. The paper also discusses the limitations of FLAD methods and highlights the potential for extending these methods to optimize for multiple target tasks simultaneously. However, it does not talk ab




        Answer:
        No, this paper does not explicitly mention using Retrieval-Augmented Generation (RAG). The paper proposes a novel image classification framework via hierarchical comparisons using a large language model (LLM), specifically ChatGPT, to incorporate class-specific knowledge into the comparison process. This framework is demonstrated to be intuitive, effective, and explainable through extensive experiments and analyses. While LLMs are commonly used in NLP tasks, this paper focuses on leveraging LLMs to enhance CLIP's accuracy in the zero-shot open-vocabulary setting for image classification rather than applying them in traditional NLP applications.


PAPER 23: Propagating Knowledge Updates to LMs Through Distillation





        Answer:
        No, this paper does not use Retrieval-Augmented Generation (RAG). The LLM used in this paper is trained using standard techniques and is not specifically designed for solving real-world problems through RAG or any other technique. The paper focuses on updating the knowledge stored in the LLM's parameters using a context distillation-based approach, rather than applying RAG in traditional NLP applications. The paper mentions that the updated LLM can make broader inferences based on injected facts, which could potentially improve its performance in various NLP tasks, but it does not provide specific examples or applications.


PAPER 24: ParaFuzz: An Interpretability-Driven Technique for Detecting Poisoned Samples in NLP





        ---
        Answer:
        No, this paper does not use Retrieval-Augmented Generation (RAG). The paper introduces a new technique called Parafuzz, which uses ChatGPT, a state-of-the-art large language model, as a paraphraser to remove triggers from poisoned samples while preserving input semantics. This technique is used to detect poisoned samples in NLP models, specifically backdoor attacks, which are becoming a significant threat to NLP models. The paper compares its results with other detection techniques, including STRIP, RAP, and ONION, and demonstrates superior performance, particularly against covert attacks like the Hidden Killer Attack. The paper also acknowledges funding support from various organizations, but it does not discuss applying RAG in traditional NLP applications or mention any specific NLP applications.


PAPER 25: Meet in the Middle: A New Pre-training Paradigm





        Answer: No, the paper does not mention anything about Retrieval-Augmented Generation (RAG). Therefore, there is no need to answer Questions 2 and 3.


PAPER 26: AmadeusGPT: a natural language interface for interactive animal behavioral analysis





        Answer:
        Question 1: Yes, the paper mentions using a novel dual-memory mechanism to allow communication between short-term and long-term memory using symbols as context pointers for retrieval and saving. This mechanism overcomes the context window limitation of LLMs like GPT3.5 and GPT4, which allows for interactive language-based queries that are potentially well suited for making interactive behavior analysis.
        Question 2: The paper introduces AmadeusGPT, a natural language interface that turns natural language descriptions of behaviors into machine-executable code. Users directly use language-based definitions of behavior, and the augmented GPT develops code based on the core AmadeusGPT API, which contains machine learning, computer vision, spatio-temporal reasoning, and visualization modules. Users can interactively refine results and seamlessly add new behavioral modules as needed.
        Question 3: No, the paper does not talk about applying RAG in traditi




        Answer: No, this paper does not explicitly mention using Retrieval-Augmented Generation (RAG). Therefore, Questions 2 and 3 do not apply.


PAPER 28: Joint processing of linguistic properties in brains and language models





        Answer: No, this paper does not use Retrieval-Augmented Generation (RAG). The paper focuses on investigating the correspondence between the detailed processing of linguistic information by the human brain versus language models using a direct approach. It involves eliminating information related to specific linguistic properties in the language model representations and observing how this intervention affects the alignment with fMRI brain recordings obtained while participants listened to a story. The paper proposes a direct approach for evaluating the joint processing of linguistic properties in brains and language models and shows that the removal of a range of linguistic properties from both language models (pretrained BERT and GPT2) leads to a significant decrease in brain alignment across all layers in the language model. The paper also discusses the contribution of each linguistic property to the trend of brain alignment across layers and finds that tree depth and top co



105 and is not yet clear.
205 and the same results.
205 is not yet clear.
205 and is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear.
205 is not yet clear




        Answer: No, the paper does not explicitly mention using RAG. It focuses on evaluating the functional correctness of LLMs for code generation using a benchmark called HumanEval+, which includes 164 programming problems with test cases. The paper proposes EvalPlus, a toolkit for benchmarking LLMs' functional correctness, which extends the HumanEval benchmark by adding 80x more test cases automatically generated by LLMs and mutation analysis. The paper also introduces a new test-suite reduction technique called test-suite reducer, which reduces the number of test cases required for functional correctness evaluation by 47x while maintaining similar pass rates. The paper discusses the limitations of existing benchmarks and highlights the importance of test-suite reducer in reducing the computational cost of functional correctness evaluation. The paper also mentions the weaknesses of existing benchmarks, including incorrect ground truth and limited test coverage, and suggests ways t




        Answer: Yes, the paper uses retrieval-augmented generation (RAG). The LLM, called PoET, is used to generate and score arbitrary modifications conditioned on any protein family of interest. It can be used as a retrieval-augmented language model to generate and score arbitrary modifications conditioned on any protein family of interest. PoET can also be applied to traditional NLP applications such as text completion and summarization, but these applications are not explicitly mentioned in the paper.


PAPER 32: De novo Drug Design using Reinforcement Learning with Multiple GPT Agents





        Answer:
        No, the paper does not mention anything about retrieval-augmented generation (RAG).
        The LLM used in this paper is the GPT model, which is adopted as the agent in the proposed
        MolRL-MGPT algorithm for de novo drug molecular design. The GPT model is trained on a large
        corpus of text and can generate new text based on a given prompt. In the context of MolRL-
        MGPT, the GPT agents are used to suggest new molecular structures based on a given
        objective and scoring function. This application of LLMs is specific to the domain of
        de novo drug design and is not discussed in the context of traditional NLP applications.
        The paper does not mention any specific NLP applications where RAG is applied.
