### Installation of Required Library

In [None]:
!pip install qiskit
!pip install qiskit-aer
!pip install matplotlib
!pip install pylatexenc
!pip install bibtexparser
!pip install requests
!pip install transformers
!pip install huggingface_hub
!pip install bitsandbytes
!huggingface-cli login



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineGr

In [None]:
from qiskit import QuantumCircuit, transpile
#from qiskit_aer import Aer
from qiskit.circuit.library import GroverOperator
from qiskit.visualization import plot_histogram
from qiskit.circuit.library import MCXGate
from collections import Counter
import pandas as pd
import requests

# Import section
from qiskit_aer import AerSimulator
from google.colab import files
import io # Import the io module
import bibtexparser

### Upload CSV file of test files

In [None]:
##Only upload once. Write back comment to prevent multiple upload of file

uploaded = files.upload()

In [None]:
# Read the file content using the filename
with open('/content/GoogleScholar.bib', 'r', encoding='utf-8') as bibtex_file:
    bibtex_str = bibtex_file.read()  # Read the content as a string
    bib_database = bibtexparser.loads(bibtex_str)

# Convert the parsed data to a pandas DataFrame
papers_df = pd.DataFrame(bib_database.entries)
papers_df['index'] = range(len(papers_df))

print(papers_df)

                                                  note  \
0    109 cites: https://scholar.google.com/scholar?...   
1    69 cites: https://scholar.google.com/scholar?c...   
2    13 cites: https://scholar.google.com/scholar?c...   
3    1342 cites: https://scholar.google.com/scholar...   
4    37 cites: https://scholar.google.com/scholar?c...   
..                                                 ...   
995  36 cites: https://scholar.google.com/scholar?c...   
996  26 cites: https://scholar.google.com/scholar?c...   
997  38 cites: https://scholar.google.com/scholar?c...   
998  41 cites: https://scholar.google.com/scholar?c...   
999  25 cites: https://scholar.google.com/scholar?c...   

                                              abstract  year  \
0    … of micro-credentials and their relationship ...  2021   
1    … The purpose of this article is to introduce ...  2020   
2    … as part of the European Approach to Micro-Cr...  2022   
3    … Rather, we are con cerned principally wi

## Call on Hugging Face API

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers import AutoModelWithAdapters, AutoTokenizer

HUGGING_FACE_API_TOKEN = "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"
LLAMA_MODEL = "meta-llama/Llama-2-7b"  # Updated model identifier

# Function to call Hugging Face API for Llama
def llama_request(prompt):

    # Quantization configuration for Llama-2 models
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,  # Use 4-bit quantization
        bnb_4bit_quant_type="nf4",  # Quantization type
        bnb_4bit_use_double_quant=True,  # Use double quantization
    )

    tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL, use_auth_token=HUGGING_FACE_API_TOKEN)  # Add auth token
    model = AutoModelForCausalLM.from_pretrained(
        LLAMA_MODEL,
        trust_remote_code=True,
        quantization_config=bnb_config,
        use_auth_token=HUGGING_FACE_API_TOKEN, # Add auth token here as well
        device_map="auto",
    )

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate
    generate_ids = model.generate(**inputs, max_new_tokens=100)

    # Decode
    generated_text = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, only_first=True)[0]

    return generated_text

ImportError: cannot import name 'AutoModelWithAdapters' from 'transformers' (/usr/local/lib/python3.10/dist-packages/transformers/__init__.py)

### Ask LLM for context and best Keywords related to search query

In [None]:
def generate_keywords(query):
    prompt = f"Generate related keywords and phrases for an academic paper search based on the query: '{query}'"
    keywords_text = llama_request(prompt)
    keywords = keywords_text.split(", ")
    return keywords

### Search database through Quiskit

In [None]:
def search_papers(query):
    n_qubits = 10  # Set to 10 for searching across 1,000 papers

    # Step 1: Use LLM to generate expanded keywords from the query
    keywords = generate_keywords(query)
    print("Generated Keywords:", keywords)

    # Step 2: Filter papers using the LLM-generated keywords
    relevant_papers = papers_df[
        papers_df['keywords'].apply(lambda x: any(kw.lower() in x.lower() for kw in keywords))
    ]

    # Step 3: Set up indices of relevant papers for Grover's Algorithm
    indices_of_marked_elements = relevant_papers['index'].tolist()

    # Run Grover's algorithm with the filtered indices
    mycircuit = Grover(n_qubits, indices_of_marked_elements)
    simulator = Aer.get_backend('qasm_simulator')
    result = execute(mycircuit, simulator, shots=2048).result()

    # Step 4: Process the quantum results
    counts = result.get_counts()
    top_results = Counter(counts).most_common(5)

    # Step 5: Retrieve top 5 papers based on search results
    top_papers = []
    for state, count in top_results:
        paper_index = int(state, 2)  # Convert binary state to decimal
        if paper_index in papers_df['index'].values:
            paper = papers_df.loc[papers_df['index'] == paper_index].to_dict(orient="records")[0]
            paper['relevance_count'] = count  # Relevance count from Grover's output
            top_papers.append(paper)

    return top_papers


In [None]:
query = "Life Long Learning"
top_papers = search_papers(query)
for i, paper in enumerate(top_papers, start=1):
    print(f"Rank {i}: {paper['title']} by {paper['author']} (Relevance Count: {paper['relevance_count']})")
    print("Summary:", paper['summary'])
    print("\n")

OSError: meta-llama/Llama-2-7b does not appear to have a file named config.json. Checkout 'https://huggingface.co/meta-llama/Llama-2-7b/tree/main' for available files.

In [None]:
def search_papers(query, papers_df):
    n_qubits = 10  # Set to 10 for searching across 1,000 papers

    # Step 1: Directly use the query to filter papers based on keywords
    keywords = query.lower().split()  # Basic split on spaces for keywords
    relevant_papers = papers_df[
        papers_df['Booktitle'].apply(lambda x: any(kw in x.lower() for kw in keywords))
    ]

    # Step 2: Get the indices of relevant papers for Grover's Algorithm
    indices_of_marked_elements = relevant_papers['index'].tolist()

    # Run Grover's algorithm with the filtered indices
    mycircuit = Grover(n_qubits, indices_of_marked_elements)
    simulator = Aer.get_backend('qasm_simulator')
    result = execute(mycircuit, simulator, shots=2048).result()

    # Step 3: Process the quantum results
    counts = result.get_counts()
    top_results = Counter(counts).most_common(5)

    # Step 4: Retrieve top 5 papers based on search results
    top_papers = []
    for state, count in top_results:
        paper_index = int(state, 2)  # Convert binary state to decimal
        if paper_index in papers_df['index'].values:
            paper = papers_df.loc[papers_df['index'] == paper_index].to_dict(orient="records")[0]
            paper['relevance_count'] = count  # Relevance count from Grover's output
            top_papers.append(paper)

    return top_papers


query = "Micro-credential"  # Input your search query here
top_papers = search_papers(query, papers_df)  # Get the top papers

# Display the results
for i, paper in enumerate(top_papers, start=1):
    print(f"Rank {i}: {paper['title']} by {paper['author']} (Relevance Count: {paper['relevance_count']})")

KeyError: 'Booktitle'