In [14]:
from sentence_transformers import SentenceTransformer
import pandas as pd

df = pd.read_csv('data/arxiv_cs.AI_papers.csv')
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for all abstracts
embeddings = model.encode(df['abstract'].tolist(), show_progress_bar=True) # Shape: (500, 384)

Batches:   0%|          | 0/16 [00:00<?, ?it/s]

In [15]:
import faiss
import numpy as np

# Convert embeddings to a numpy array with float32 type (FAISS requirement)
embeddings = np.array(embeddings).astype('float32')

# Create a FAISS index (L2 distance for similarity)
index = faiss.IndexFlatL2(embeddings.shape[1])

# Add embeddings to the index
index.add(embeddings)

In [16]:
# save the index
faiss.write_index(index, "data/sciquery_index.faiss")

In [17]:
def retrieve(query, k=5, similarity_threshold=0.4):
    """
    Retrieves relevant abstracts based on a query using FAISS index,
    filtering by similarity threshold.

    Args:
        query (str): The search query.
        k (int): The maximum number of results to consider from the index search.
        similarity_threshold (float): The minimum cosine similarity score for
                                      a result to be included.

    Returns:
        list[tuple[str, float]]: A list of tuples, each containing an abstract
                                 and its similarity score, sorted by similarity
                                 in descending order. Returns an empty list if
                                 no results meet the threshold.
    """
    if not query or not isinstance(query, str):
        print("Error: Query must be a non-empty string.")
        return []

    # Encode the query and ensure correct type/normalization
    query_embedding = model.encode([query]).astype('float32')
    # Optional: Explicit normalization if needed, though SentenceTransformer usually handles this
    # faiss.normalize_L2(query_embedding)

    # Search the FAISS index for the k nearest neighbors
    distances, indices = index.search(query_embedding, k)

    results = []
    if indices.size > 0: # Check if any indices were returned
        for i, dist in enumerate(distances[0]):
            # FAISS returns -1 if fewer than k neighbors are found
            if indices[0][i] == -1:
                continue

            # Calculate cosine similarity from L2 distance (for normalized embeddings)
            # similarity = 1 - (distance^2) / 2
            # Clamp similarity to [0, 1] to handle potential floating point inaccuracies
            similarity = max(0.0, 1.0 - (dist**2) / 2.0)

            # Filter based on the similarity threshold
            if similarity >= similarity_threshold:
                abstract_index = indices[0][i]
                abstract = df.iloc[abstract_index]['abstract']
                results.append((abstract, similarity))

    # The results from FAISS L2 search are already sorted by distance (ascending),
    # which means they are implicitly sorted by similarity (descending).
    # No explicit sort needed unless combining results from multiple searches.

    return results

In [18]:
print(retrieve("What's new in neural network optimization?"))

[("in this paper, we adopt a probability distribution estimation perspective to\nexplore the optimization mechanisms of supervised classification using deep\nneural networks. we demonstrate that, when employing the fenchel-young loss,\ndespite the non-convex nature of the fitting error with respect to the model's\nparameters, global optimal solutions can be approximated by simultaneously\nminimizing both the gradient norm and the structural error. the former can be\ncontrolled through gradient descent algorithms. for the latter, we prove that\nit can be managed by increasing the number of parameters and ensuring parameter\nindependence, thereby providing theoretical insights into mechanisms such as\nover-parameterization and random initialization. ultimately, the paper\nvalidates the key conclusions of the proposed method through empirical results,\nillustrating its practical effectiveness.", 0.5165012108941465), ("artificial intelligence (ai) has achieved new levels of performance and

In [19]:
def compute_confidence(retrieved_results):
    """
    Computes a confidence score based on the average similarity of retrieved results.

    Args:
        retrieved_results (list[tuple[str, float]]): The output from the retrieve function,
                                                     a list of (abstract, similarity) tuples.

    Returns:
        float: The confidence score as a percentage (0-100). Returns 0.0 if no results.
    """
    # Check if the list of results is empty
    if not retrieved_results:
        return 0.0

    # Extract the similarity scores from the list of tuples
    # The second element (index 1) of each tuple is the similarity score
    similarities = [item[1] for item in retrieved_results]

    # Calculate the average similarity and convert to percentage
    average_similarity = sum(similarities) / len(similarities)
    return average_similarity * 100.0


In [20]:
queries = [
    "What's new in neural network optimization?",
    "What's new in reinforcement learning?",
    "What's new in natural language processing?",
    "What's new in computer vision?",
    "What's new in robotics?",
    "What's new in quantum computing?",
]

for query in queries:
    retrieved_results = retrieve(query)
    confidence = compute_confidence(retrieved_results)
    print(f"Query: {query}")
    print(f"Confidence: {confidence:.2f}%")
    print(f"Results: {retrieved_results}\n")


Query: What's new in neural network optimization?
Confidence: 48.38%
Results: [("in this paper, we adopt a probability distribution estimation perspective to\nexplore the optimization mechanisms of supervised classification using deep\nneural networks. we demonstrate that, when employing the fenchel-young loss,\ndespite the non-convex nature of the fitting error with respect to the model's\nparameters, global optimal solutions can be approximated by simultaneously\nminimizing both the gradient norm and the structural error. the former can be\ncontrolled through gradient descent algorithms. for the latter, we prove that\nit can be managed by increasing the number of parameters and ensuring parameter\nindependence, thereby providing theoretical insights into mechanisms such as\nover-parameterization and random initialization. ultimately, the paper\nvalidates the key conclusions of the proposed method through empirical results,\nillustrating its practical effectiveness.", 0.51650121089414

In [21]:
import requests
import os
from dotenv import load_dotenv

load_dotenv()  # Loads variables from .env
HF_TOKEN = os.environ.get('HF_TOKEN')

# Hugging Face API setup
API_URL = "https://router.huggingface.co/novita/v3/openai/chat/completions"
headers = {"Authorization": f"Bearer {HF_TOKEN}", "Content-Type": "application/json"}

def generate_answer(context, query):
    # Create a prompt combining the query and context
    prompt = f"Based on the following context, answer the question: {query}\n\nContext: {context}"

    payload = {
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "model": "deepseek/deepseek-v3-0324",
    }
    
    # Send request to the API
    response = requests.post(API_URL, headers=headers, json=payload)

    if response.status_code != 200:
        raise Exception(f"API request failed with status code {response.status_code}")
    
    # Return the generated text
    return response.json()["choices"][0]["message"]["content"]

In [22]:
def query_sciquery(query, k=5):
    retrieved_results = retrieve(query, k)

    if len(retrieved_results) == 0:
        return "No relevant papers found."
    
    abstracts, similarities = zip(*retrieved_results)
    
    context = "\n".join(abstracts)

    answer = generate_answer(context, query)
    confidence = compute_confidence(retrieved_results)

    return answer, confidence

In [23]:
query_sciquery("What’s new in neural network optimization?")

('The context highlights several novel advancements and perspectives in neural network optimization, particularly inspired by biological processes and theoretical insights. Here are the key innovations:\n\n1. **Biological Inspiration for Dynamic Architectures**:  \n   - **Neurogenesis-inspired "Dropin"**: Introducing new neurons (parameters) during training, analogous to the birth of neurons in the brain.  \n   - **Neuroapoptosis-inspired "Dropout" and Structural Pruning**: Removing redundant neurons (like biological cell death) to enhance efficiency.  \n   - **Neuroplasticity for Lifelong Learning**: Combining dynamic addition ("dropin") and removal ("dropout"/pruning) of neurons to enable adaptive, continuous learning in large-scale models.  \n\n2. **Theoretical Optimization Insights**:  \n   - **Fenchel-Young Loss**: Despite non-convexity, global optima can be approximated by minimizing both gradient norm (via gradient descent) and structural error.  \n   - **Over-Parameterization a

In [24]:
# Sample test queries
test_queries = [
    "What are the latest advancements in reinforcement learning?",
    "How does gradient clipping improve neural network training?"
]

# Run tests
for query in test_queries:
    print(f"Query: {query}\n")
    print(f"Answer: {query_sciquery(query)}\n")

Query: What are the latest advancements in reinforcement learning?

Answer: ('The latest advancements in reinforcement learning (RL) highlighted in the provided context include:\n\n1. **Theory-Based Reinforcement Learning (TBRL) and TheoryCoder**:  \n   - TBRL addresses the gap in human-like sample efficiency and adaptability by using structured, causal world models ("theories") for planning, generalization, and exploration.  \n   - **TheoryCoder** improves TBRL by introducing hierarchical representations of theories and program synthesis (using LLMs to generate low-level transition models in Python). This enables bilevel planning for scalable performance in complex environments like grid-world games.\n\n2. **Implementation Inconsistencies in Deep RL Algorithms**:  \n   - Studies reveal significant performance discrepancies among implementations of state-of-the-art algorithms (e.g., PPO, DQN) due to code-level inconsistencies. For example, some PPO implementations achieve superhuman pe

In [25]:
query_sciquery("What's new in robotics?")

'No relevant papers found.'

In [26]:
# Test the app in Gradio
import gradio as gr

def gradio_wrapper(query):
    answer, confidence = query_sciquery(query)
    confidence_text = f"Confidence: {confidence:.1f}%"    
    return answer, confidence_text

interface = gr.Interface(
    fn=gradio_wrapper,
    inputs=gr.Textbox(label="Ask a question about AI research", placeholder="e.g., What's new in neural network optimization?"),
    outputs=[
        gr.Markdown(label="Answer"),
        gr.Textbox(label="Confidence Score")
    ],
    title="SciQuery: Ask Science Questions",
    description="Enter a question about AI research, and I’ll answer using arXiv papers!",
    examples=[["What’s new in neural network optimization?"], ["How does reinforcement learning work?"]]
)

interface.launch()


* Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


