In [None]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
from datasets import Dataset, load_dataset
# from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer


In [None]:
medicines = pd.read_csv('medicine-recommendation-system-main/medicine.csv')

In [34]:
medicines.tail()

Unnamed: 0,index,Drug_Name,Reason,Description
9715,9716,T Muce Ointment 5gm,Wound,used for treating warts
9716,9717,Wokadine 10% Solution 100mlWokadine Solution 5...,Wound,used to soften the skin cells
9717,9718,Wokadine M Onit 10gm,Wound,used for scars
9718,9719,Wound Fix Solution 100ml,Wound,used for wounds
9719,9720,Wounsol Ointment 15gm,Wound,used to treat and remove raised warts (usually...


In [None]:
medicines_dataset = Dataset.from_pandas(medicines, preserve_index=False)


In [None]:
def concatenate_text(row):
    return {
        "text": row['Drug_Name']
        + '\n '
        + row['Description']
    }

medicines_dataset = medicines_dataset.map(concatenate_text)


In [None]:
medicines_dataset

In [None]:
# device = torch.device("cuda")
device = torch.device("cpu")


model_ckpt = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
model = SentenceTransformer(model_ckpt, device=device)

In [None]:
def get_embeddings(text_list):
    embeddings = model.encode(text_list, convert_to_numpy=False, convert_to_tensor=True, show_progress_bar=False, device=device)
    return embeddings

embeddings = get_embeddings(medicines_dataset['text'][0]).detach().cpu().numpy()
embeddings.shape

In [None]:
embeddings_dataset = medicines_dataset.map(
    lambda x: {"embeddings": get_embeddings(x['text']).detach().cpu().numpy()}
)

In [None]:
len(embeddings_dataset[0]['embeddings'])

In [35]:
embeddings_dataset.add_faiss_index(column="embeddings")

  0%|          | 0/10 [00:00<?, ?it/s]

Dataset({
    features: ['index', 'Drug_Name', 'Reason', 'Description', 'text', 'embeddings'],
    num_rows: 9720
})

In [None]:
embeddings_dataset.features

In [None]:
def search_medicine(query_medicine):
    query_embedding = get_embeddings(query_medicine).detach().cpu().numpy()
    scores, top_medicines = embeddings_dataset.get_nearest_examples(
        "embeddings", query_embedding, k=5
    )
    top_medicines_df = pd.DataFrame.from_dict(top_medicines)
    top_medicines_df['scores'] = scores
    top_medicines_df.sort_values('scores', ascending=False, inplace=True)
    return top_medicines_df['Drug_Name'].tolist()

In [None]:
search_medicine("ACGEL NANO Gel 15gm")

# Save the model

In [46]:
modelPath='./saved/Drug_Name_Description'

model.save(modelPath)

# Load the model

In [132]:
model_1 = SentenceTransformer(modelPath)

In [133]:
def get_embeddings_2(text_list):
    embeddings = model_1.encode(text_list, convert_to_numpy=False, convert_to_tensor=True, show_progress_bar=False, device=device)
    return embeddings

embeddings_2 = get_embeddings(medicines_dataset['text'][0]).detach().cpu().numpy()
embeddings_2.shape

(768,)

In [130]:
def search_medicine_2(query_medicine):
    query_embedding = get_embeddings_1(query_medicine).detach().cpu().numpy()
    scores, top_medicines = embeddings_dataset_2.get_nearest_examples(
        "embeddings", query_embedding, k=5
    )
    top_medicines_df = pd.DataFrame.from_dict(top_medicines)
    top_medicines_df['scores'] = scores
    top_medicines_df.sort_values('scores', ascending=False, inplace=True)
    return top_medicines_df['Drug_Name'].tolist()

In [131]:
search_medicine_2("ACGEL NANO Gel 15gm")

['Foracne Plus Gel 15gm',
 'Adapnil Gel 15gm',
 'Adabor Gel 15gm',
 'Adalene Nano Gel 15mg',
 'ACGEL NANO Gel 15gm']

# Gradio Demo

In [32]:
import gradio as gr

In [33]:
demo = gr.Interface(fn=search_medicine, inputs="text", outputs=["text"]*5)
demo.launch()

Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.




In [None]:
demo.close()