In [1]:
import requests
import wikipediaapi
from groq import Groq
from sentence_transformers import SentenceTransformer

In [2]:
# Initialize Groq client
api_key = "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"
client = Groq(api_key=api_key)

# Initialize vectorizer model for optional vectorization
vectorizer = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
# Function to interact with Groq API
def generate_response(prompt):
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "user", "content": prompt}
        ],
        model="llama3-70b-8192"
    )
    return chat_completion.choices[0].message.content

In [4]:
# Function to fetch data from ArXiv API
def fetch_from_arxiv(query):
    response = requests.get(f"https://export.arxiv.org/api/query?search_query={query}")
    return response.text

In [5]:
# Function to fetch data from Wikipedia API
def fetch_from_wikipedia(query):
    wiki_wiki = wikipediaapi.Wikipedia(
        language='en',
        user_agent='AI-KnowledgeAgent/1.0 (Contact: your-email@example.com)'
    )
    page = wiki_wiki.page(query)
    if page.exists():
        return page.summary
    else:
        return f"Wikipedia page for '{query}' not found."

In [6]:
# Function to fetch data from DuckDuckGo API
def fetch_from_duckduckgo(query):
    response = requests.get(f"https://api.duckduckgo.com/?q={query}&format=json")
    return response.json().get('AbstractText', "No relevant data found.")

In [7]:
# Function to send tool results through the LLM for refinement
def refine_tool_results(tool_name, raw_data):
    prompt = f"""
    Task: Analyze and refine the following data extracted from {tool_name}.
    Raw Data: "{raw_data}"
    
    Provide a clear and concise summary of the key points.
    """
    return generate_response(prompt)

In [8]:
# Function to split data into batches
def split_into_batches(data, batch_size=2000):
    return [data[i:i + batch_size] for i in range(0, len(data), batch_size)]

In [9]:
# Function to process and summarize batched data
def process_batches(tool_name, data, batch_size=2000):
    batches = split_into_batches(data, batch_size)
    batch_summaries = []
    
    for i, batch in enumerate(batches):
        prompt = f"""
        Task: Summarize the following batch of data extracted from {tool_name}.
        Batch {i + 1}/{len(batches)}:
        "{batch}"

        Provide a concise summary.
        """
        summary = generate_response(prompt)
        batch_summaries.append(summary)

    # Combine summaries from all batches
    combined_summary = " ".join(batch_summaries)
    return combined_summary


In [10]:
# Function to vectorize data
def vectorize_data(data):
    return vectorizer.encode(data)

In [11]:
# Main workflow
def main():
    user_query = input("Enter your query: ")
    
    # Step 1: Generate initial response using Groq API
    initial_response = generate_response(user_query)
    print("Initial Response from Groq API:\n", initial_response)
    
    # Step 2: Extract data from tools
    print("\nFetching data from ArXiv...")
    arxiv_data = fetch_from_arxiv(user_query)
    print("Fetching data from Wikipedia...")
    wikipedia_data = fetch_from_wikipedia(user_query)
    print("Fetching data from DuckDuckGo...")
    duckduckgo_data = fetch_from_duckduckgo(user_query)
    
    # Step 3: Refine each tool's data individually through the LLM
    print("\nRefining data from ArXiv...")
    arxiv_summary = process_batches("ArXiv", arxiv_data) if len(arxiv_data) > 2000 else refine_tool_results("ArXiv", arxiv_data)
    print("ArXiv Summary:\n", arxiv_summary)

    print("\nRefining data from Wikipedia...")
    wikipedia_summary = process_batches("Wikipedia", wikipedia_data) if len(wikipedia_data) > 2000 else refine_tool_results("Wikipedia", wikipedia_data)
    print("Wikipedia Summary:\n", wikipedia_summary)

    print("\nRefining data from DuckDuckGo...")
    duckduckgo_summary = process_batches("DuckDuckGo", duckduckgo_data) if len(duckduckgo_data) > 2000 else refine_tool_results("DuckDuckGo", duckduckgo_data)
    print("DuckDuckGo Summary:\n", duckduckgo_summary)
    
    # Step 4: Optionally vectorize data (if needed)
    arxiv_vector = vectorize_data(arxiv_summary) if arxiv_summary else None
    wikipedia_vector = vectorize_data(wikipedia_summary) if wikipedia_summary else None
    duckduckgo_vector = vectorize_data(duckduckgo_summary) if duckduckgo_summary else None

    # Step 5: Combine tool summaries into a final summary
    final_prompt = f"""
    Task: Summarize the collective insights from multiple tools.
    
    Insights:
    - ArXiv Summary: {arxiv_summary}
    - Wikipedia Summary: {wikipedia_summary}
    - DuckDuckGo Summary: {duckduckgo_summary}
    
    Provide a well-structured final summary that integrates all the insights.
    """
    final_response = generate_response(final_prompt)
    print("\nFinal Summarized Response:\n", final_response)

    # Step 6: Offer to save the final response as notes
    save_notes = input("\nDo you want to save this response as a file for notes? (yes/no): ").strip().lower()
    if save_notes == "yes":
        file_name = input("Enter the filename (e.g., notes.txt): ").strip()
        try:
            with open(file_name, "w") as file:
                file.write("Query:\n" + user_query + "\n\n")
                file.write("ArXiv Summary:\n" + arxiv_summary + "\n\n")
                file.write("Wikipedia Summary:\n" + wikipedia_summary + "\n\n")
                file.write("DuckDuckGo Summary:\n" + duckduckgo_summary + "\n\n")
                file.write("Final Response:\n" + final_response + "\n")
            print(f"Response saved successfully in {file_name}.")
        except Exception as e:
            print(f"An error occurred while saving the file: {e}")


In [12]:
# Execute the workflow
if __name__ == "__main__":
    main()

Enter your query: RAG latest papers
Initial Response from Groq API:
 RAG (Reinforcement Learning with Attention Guidance) is a research paper that combines reinforcement learning with attention mechanisms to improve the learning efficiency and effectiveness in complex environments. Here are some of the latest papers related to RAG:

1. **"Reinforcement Learning with Attention Guidance"** by Zhang et al. (2020)

This is the original paper that introduced the RAG algorithm. The authors propose using attention mechanisms to guide the exploration in reinforcement learning, and they show that RAG can achieve better performance than traditional reinforcement learning methods in several environments.

2. **"Hierarchical Reinforcement Learning with Attention Guidance"** by Li et al. (2021)

This paper extends the RAG algorithm to hierarchical reinforcement learning settings. The authors propose using attention mechanisms to guide the learning of sub-policies in a hierarchical reinforcement lea

Wikipedia Summary:
 After analyzing the raw data, here is a clear and concise summary of the key points:

**Summary:** 

* There is no Wikipedia page for "RAG latest papers".
* The search query yielded no results.

In other words, the data suggests that there is no existing Wikipedia page dedicated to "RAG latest papers". If you're looking for information on RAG (whatever RAG stands for), you might need to search elsewhere or create a new Wikipedia page on the topic.

Refining data from DuckDuckGo...
DuckDuckGo Summary:
 I apologize, but it seems that there is no data to analyze and refine. The raw data provided is an empty string.

If you meant to provide actual data, please paste it, and I'll be happy to help you analyze and refine it, providing a clear and concise summary of the key points.

Final Summarized Response:
 Here is a well-structured final summary that integrates all the insights from the multiple tools:

**Retrieval-Augmented Generation (RAG) for Large Language Models (L