In [1]:
import pickle
import numpy as np
import json
from sentence_transformers import SentenceTransformer

import warnings
warnings.filterwarnings('ignore')


In [2]:
print(np.__version__)
print(json.__version__)
#print(SentenceTransformer.__version__)

1.21.5
2.0.9


In [3]:
with open('Files/titles.pkl', 'rb') as f:
    titles = pickle.load(f)

with open('Files/embeddings.pkl', 'rb') as f:
    embeddings = pickle.load(f)

with open('Files/authors.pkl', 'rb') as f:
    authors = pickle.load(f)

with open('Files/years.pkl', 'rb') as f:
    years = pickle.load(f)

with open('Files/summary.pkl', 'rb') as f:
    summary = pickle.load(f)

index = pickle.load(open('Files/index.sav' , 'rb'))

#model = pickle.load(open('Files/model.sav' , 'rb'))
model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')

In [4]:
# simple query to return the first 5 similar documents (Titles, Authors, Years, Summary)

def retrieve(query, k=5):
    xq = model.encode([query])
    D, I = index.search(xq, k)

    results = []
    for i in range(k):
        results.append(
            { 
                'Title': titles[I[0][i]],
                'Author': authors[I[0][i]],
                'Year': years[I[0][i]],
                'Summary': summary[I[0][i]]
            }
        )

    return results

In [5]:
# testing 😀
query = 'What is the best way to learn Python?'
results = retrieve(query)
print(results)

[{'Title': 'Minecraft: An Engaging Platform to Learn Programming', 'Author': 'Worasait Suwannik', 'Year': 2022, 'Summary': 'Teaching programming effectively is difficult. This paper explores the\nbenefits of using Minecraft Education Edition to teach Python programming.\nEducators can use the game to teach various programming concepts ranging from\nfundamental programming concepts, object-oriented programming, event-driven\nprogramming, and parallel programming. It has several benefits, including being\nhighly engaging, sharpen creativity and problem-solving skill, motivating the\nstudy of mathematics, and making students realizes the importance of\nprogramming.'}, {'Title': 'Improving Learning from Demonstrations by Learning from Experience', 'Author': 'Marcelo H Ang Jr', 'Year': 2021, 'Summary': 'How to make imitation learning more general when demonstrations are\nrelatively limited has been a persistent problem in reinforcement learning\n(RL). Poor demonstrations lead to narrow and 

----

In [13]:
def write_json(data):
    base_path = 'Sample_jsons/'
    excportpath = base_path + 'excport.json'
    with open(excportpath, 'w') as f:
        json.dump(data, f)
    
    print('JSON file created successfully')
    return excportpath

# testing 😀
write_json(results)

-------

In [None]:
# write list of dictionaries to json file
with open('results.json', 'w') as f:
    json.dump(results, f)

----
## Gardio app

In [10]:
import gradio as gr

interface = gr.Interface(
    fn=retrieve,
    inputs = gr.inputs.Textbox(lines=1, placeholder="Enter Query...", label="Query text"),
    # a json output with 4 keys: titles, authors, years, summary
    outputs = gr.outputs.JSON(label="Similar Documents"),
    title="Semantic Search",
    description="Search for similar documents using semantic search.",
    allow_flagging=False,
    examples=[
        ["Mathematical models of the spread of infectious diseases in humans and animals"],
        ["A new method for solving the nonlinear eigenvalue problem"]
    ]
)

In [11]:
interface.launch(inline=True, inbrowser=False)

Running on local URL:  http://127.0.0.1:7862

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x7febc6ad48e0>, 'http://127.0.0.1:7862/', None)

## Reserves


```python

def retrieve(query, k=5):
    xq = model.encode([query])
    D, I = index.search(xq, k)

    results = {'titles': [], 'authors': [], 'years': [], 'summary': []}
    for i in range(k):
        results['titles'].append(titles[I[0][i]])
        results['authors'].append(authors[I[0][i]])
        results['years'].append(years[I[0][i]])
        results['summary'].append(summary[I[0][i]])
    
    return results
    #return [titles[i] for i in I[0]], [authors[i] for i in I[0]], [years[i] for i in I[0]], [summary[i] for i in I[0]]


for i in range(len(results['titles'])):
    print(results['titles'][i])
    print(results['authors'][i])
    print(results['years'][i])
    print(results['summary'][i])
    print('----------------------------------------' * 2)


# write each item in the list to a json file on new line
with open('results2.json', 'w') as f:
    for item in results:
        json.dump(item, f)
        f.write('\n' % item)


# simple query to return the first 5 similar documents (Titles, Authors, Years, Summary)
def retrieve(query, k=5):
    xq = model.encode([query])
    D, I = index.search(xq, k)

    results = {'Result 1': [], 'Result 2': [], 'Result 3': [], 'Result 4': [], 'Result 5': []}
    for i in range(k):
        results['Result '+str(i+1)].append(titles[I[0][i]])
        results['Result '+str(i+1)].append(authors[I[0][i]])
        results['Result '+str(i+1)].append(years[I[0][i]])
        results['Result '+str(i+1)].append(summary[I[0][i]])
    return results


## Draft for json output
k = 5 # number of similar documents to return
xq = model.encode(['Africa Technology'])
D, I = index.search(xq, k)

# store all dictionaries in a list
results = []

for i in range(k):
    results.append({
        'Title': titles[I[0][i]],
        'Author': authors[I[0][i]],
        'Year': years[I[0][i]],
        'Summary': summary[I[0][i]]
    })

# write list of dictionaries to json file
with open('results3.json', 'w') as f:
    json.dump(results, f)

```