In [1]:
import gradio as gr
import numpy as np

#import the search function form the search.py file
from search import search_documents, find_most_relevant_snippet, TfidfVectorizer, SentenceTransformer

model = SentenceTransformer('paraphrase-distilroberta-base-v1')
documents = np.load("documents_with_embeddings.npy", allow_pickle=True)

# Function to wrap the search functionality and return results as a list of dictionaries
def search_interface(query):
    relevant_docs = search_documents(query, documents, model)
    vectorizer = TfidfVectorizer()

    html_results = ""
    for i, (doc, score) in enumerate(relevant_docs):
        most_relevant_snippet, _ = find_most_relevant_snippet(query, doc['full_text'], vectorizer)
        
        html_results += f"""
        <div style="border: 1px solid #ccc; border-radius: 5px; padding: 1em; margin-bottom: 1em;">
            <h4>Document {i + 1}</h4>
            <p><b>PID:</b> {doc["ids"]}</p>
            <p><b>Title:</b> {doc["title"]}</p>
            <p><b>Link:</b> <a href="{doc["url"]}" target="_blank">{doc["url"]}</a></p>
            <p><b>Relevant snippet:</b> {most_relevant_snippet}</p>
            <p><b>Abstract:</b> {doc["abstract"][:300]}</p>
            <p><b>Similarity score:</b> {score:.4f}</p>
        </div>
        """

    return html_results

# Define Gradio input and output components
query_input = gr.inputs.Textbox(label="Enter your search query:")
output = gr.outputs.HTML(label="Search Results")

# Create the Gradio interface
iface = gr.Interface(
    fn=search_interface,
    inputs=query_input,
    outputs=output,
    title="Document Search",
    description="Enter a search query to find relevant documents.",
    theme="huggingface",
    layout="vertical",
)

# Launch the Gradio app
iface.launch(share=True)


[NbConvertApp] Converting notebook search.ipynb to script
[NbConvertApp] Writing 3164 bytes to search.py
Query: Earth Observation for Kenya
Document 1:
PID: ['P154784']
Title: Kenya - Climate Smart Agriculture Project : Environmental Assessment (Vol. 2) : Environmental and Social Impact Assessment Report for the Desilting and Expansion of Kabarbesi Water Pan Sub-Project Located in Emining Ward-Mogotio Sub-County, Baringo County
Link: http://documents.worldbank.org/curated/en/841071615445783266/Environmental-and-Social-Impact-Assessment-Report-for-the-Desilting-and-Expansion-of-Kabarbesi-Water-Pan-Sub-project-Located-in-Emining-Ward-Mogotio-Sub-County-Baringo-County
Relevant snippet: of the Climate Smart Agriculture Project for Kenya is to
Abstract: The development objective of the Climate Smart Agriculture Project for Kenya is to increase agricultural productivity and build resilience to climate change risks in the targeted smallholder farming and pastoral communities in Kenya, and in 



Running on local URL:  http://127.0.0.1:7860
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Running on public URL: https://249b1f58aec0c3fb44.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


