In [1]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)



from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain

In [2]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.1'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

Your GPU supports bfloat16: accelerate training with bf16=True


`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


In [4]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.01,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=8912,
)

In [5]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [6]:
import pandas as pd

df = pd.read_csv(r"C:\Users\20301162\Downloads\Medeasy.csv", encoding="UTF-8")
df = df.drop('web-scraper-order', axis=1)
df = df.drop('web-scraper-start-url', axis=1)
df = df.drop('Next', axis=1)
df = df.drop('links', axis=1)
df = df.drop('links-href', axis=1)


df

Unnamed: 0,Name,Price,unit,Type,Genric name,Company name,Description
0,Rivotril0.5 mg,Tk 8.28,/piece,Tablet,Clonazepam,Radiant Pharmaceuticals Ltd.,Medicine overviewEnglishIndications of Rivotri...
1,Montair10 mg,Tk 157.50,/10's Strip,Tablet,Montelukast,Incepta Pharmaceuticals Ltd.,Medicine overviewEnglishIndications of Montair...
2,Bislol5 mg,Tk 144.90,/14's Strip,Tablet,Bisoprolol Fumarate,Opsonin Pharma Ltd.,Medicine overviewEnglishIndications of Bislol ...
3,Rosuva5 mg,Tk 90.27,/10's Strip,Tablet,Rosuvastatin,Square Pharmaceuticals Ltd.,"<img alt=""Section"" srcSet=""/icons/note.svg 1x,..."
4,Clopid75 mg,Tk 151.20,/14's Strip,Tablet,Clopidogrel Bisulphate,Drug International Ltd.,"<img alt=""Section"" srcSet=""/icons/note.svg 1x,..."
...,...,...,...,...,...,...,...
5338,Pantonix20 mg,Tk 6.30,/piece,Tablet,Pantoprazole,Incepta Pharmaceuticals Ltd.,"<img alt=""Section"" srcSet=""/icons/note.svg 1x,..."
5339,Ecosprin75 mg,Tk 7.20,/10's Strip,Tablet,Aspirin,ACME Laboratories Ltd.,"<img alt=""Section"" srcSet=""/icons/note.svg 1x,..."
5340,Monas 1010 mg,Tk 236.25,/15's Strip,Tablet,Montelukast,ACME Laboratories Ltd.,"<img alt=""Section"" srcSet=""/icons/note.svg 1x,..."
5341,Linaglip5 mg,Tk 198.00,/10's Strip,Tablet,Linagliptin,Aristopharma Ltd.,"<img alt=""Section"" srcSet=""/icons/note.svg 1x,..."


In [7]:
import re

In [8]:
from langchain_community.document_loaders import DataFrameLoader
s_chars = '¥₽ÏïŰŬĎŸæ₿₪ÚŇÀèÅ”ĜåŽÖéříÿý€ŝĤ₹áŜŮÂ₴ûÌÇšŘúüëÓ₫ŠčÎŤÆÒœ₩öËäøÍťìĈôàĥÝ¢ç“žðÙÊĉŭÈŒÐÉÔĵùÁů„âÄűĴóêĝÞîØòď฿ČÜþňÛ'
PUNC = '+@«#_\-!$%%^&*¬()£<>?/\\|}\]\[{;\,~:\"\''
def special_char(text):
    # first, let's remove any unicode strings
    text = text.encode('ascii', 'ignore').decode()
    # remove printable bachslashes
    text = re.sub(r'[\t\s\n\r\b\a]', ' ', text)
    # Special letters
    text = re.sub(r'[{}]'.format(s_chars), '', text)
    # Punctuation [remove punctuation between spaces only which represent noises]
    text = re.sub(r'\s[{}]\s'.format(PUNC), ' ', text)
    # space at the start or the end of the context
    text = re.sub(r'(^\s)|(\s$)', '', text)
    # Single character
    text = re.sub(r'(\s[^iIaA]\s)', ' ', text)
    return text
def scrape_links(text):
    url = re.compile(r'https?://\S+|www\.\S+')
    return url.sub(r'',text)



df = df[df['Description'].str.isdigit() == False]

# Apply the special_char and scrape_links functions
df['Description'] = df['Description'].apply(special_char)
df['Description'] = df['Description'].apply(scrape_links)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Description'] = df['Description'].apply(special_char)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Description'] = df['Description'].apply(scrape_links)


In [9]:
df.head(1)

Unnamed: 0,Name,Price,unit,Type,Genric name,Company name,Description
0,Rivotril0.5 mg,Tk 8.28,/piece,Tablet,Clonazepam,Radiant Pharmaceuticals Ltd.,Medicine overviewEnglishIndications of Rivotri...


In [10]:
loader = DataFrameLoader(df, page_content_column="Name")
data = loader.load()

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Chunk text
text_splitter = RecursiveCharacterTextSplitter(
    
    chunk_size=6000,
    chunk_overlap=0,
    length_function=len,
    is_separator_regex=False,
)
chunked_documents = text_splitter.split_documents(data)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents, 
                          HuggingFaceEmbeddings(model_name="intfloat/e5-large-v2"))

retriever = db.as_retriever()

In [12]:
prompt_template = """

[INST] Instruction: If you are a Bangladeshi doctor, who can only answer from medical knowledge provided in the context. Do not introduce any additional information beyond what is given in the context.

{context}

QUESTION:

{question}

[INST]

To provide a thorough and accurate medical answer based on the context:

1.Explain the relevant medical concepts, conditions, and treatments mentioned in the context in detail, using proper medical terminology.
2.If the context discusses side effects or scenarios where medicine should not be used, include that information for the applicable medical options or courses of action. If dosages are mentioned, include those as well.
3.If the context provides pricing information for any medicines in Bangladeshi taka, include the price and brand name exactly as stated.
4.Only discuss Medicines that are explicitly mentioned and described in the provided context.
5.Also mention the company name, tablet type and others.
6. Do not ask any follow up questions.
[/INST]

"""
# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [13]:
result1 = llm_chain.invoke({"context": "", "question": "I am feeling backpain when I sit what medicine should I take?"})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [14]:
rag_chain = ( 
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

result = rag_chain.invoke("I am feeling backpain when I sit what medicine should I take?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [15]:


def prnt(text):
    parts = text.split("[/INST]")
    if len(parts) > 1:
        output = parts[1].strip()
        print(output)

In [18]:
result['text']



In [21]:
text = result['text']
def prnt(text):
    parts = text.split("[/INST]")
    if len(parts) > 1:
        output = parts[1].strip()
        print(output)
        

In [24]:

result = rag_chain.invoke("I am feeling severe backpain for long amount of time. What should I do to mitigate the backpain and what can be the reason for the backpain?")
prnt(result['text'])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Back pain is a common health problem that affects millions of people worldwide. It can be caused by various factors such as injury, degenerative conditions, infections, and autoimmune disorders. The most common causes of back pain are herniated discs, spinal stenosis, osteoarthritis, rheumatoid arthritis, and fibromyalgia.

Herniated discs occur when the gel-like cushion between the vertebrae in the spine slips out of place, putting pressure on surrounding nerves and causing pain. Spinal stenosis is a condition where the spinal canal narrows, leading to pressure on the spinal cord and nerves, resulting in pain and numbness. Osteoarthritis is a degenerative joint disease that affects the cartilage in the joints, leading to pain and stiffness. Rheumatoid arthritis is an autoimmune disorder that affects the joints, leading to pain, swelling, and stiffness. Fibromyalgia is a chronic pain syndrome characterized by widespread pain, fatigue, and tenderness in muscles and ligaments.

Treatment

In [26]:

result = rag_chain.invoke("How can I reduce my hair fall?")
prnt(result['text'])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Hair loss, also known as alopecia, is a common condition characterized by the loss of hair follicles. There are several factors that can contribute to hair loss, including genetics, hormonal imbalances, stress, and certain medications. Treatment options for hair loss depend on the underlying cause and may include medications, such as minoxidil and finasteride, as well as cosmetic procedures, such as hair transplantation.

Minoxidil is a medication that is applied topically to the scalp to stimulate hair growth. Finasteride is another medication that is taken orally to slow down hair loss. Both medications have been shown to be effective in treating hair loss.

Hair transplantation is a surgical procedure in which healthy hair follicles are removed from another part of the body and implanted in the scalp to replace lost hair.

It is important to note that some medications, such as nonsteroidal anti-inflammatory drugs (NSAIDs) and corticosteroids, can cause hair loss and should be avoide

In [27]:
result = rag_chain.invoke("I'm feeling fever and headache what should I take in oder to mitigate this problems?")
prnt(result['text'])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


The context mentions three different medicines:

1. Palonicin 0.25 mg/5 ml: Palonicin 0.25 mg/5 ml is an anti-emetic drug used to treat nausea and vomiting. It is a 5-HT3 receptor antagonist that works by blocking the reabsorption of sodium, potassium, and chloride ions in the ascending loop of Henle, which leads to increased sodium loss and potassium gain, resulting in a negative sodium balance. Palonicin 0.25 mg/5 ml is available in the form of a tablet.
2. Cotrimoxazole: Cotrimoxazole is an antibiotic used to treat various bacterial and fungal infections. It works by blocking the production of folinic acid in microorganisms, leading to their death. Cotrimoxazole is available in the form of a tablet.
3. Furosemide: Furosemide is a loop diuretic used to treat fluid overload, edema, and hyponatremia. It works by inhibiting the reabsorption of sodium in the ascending loop of Henle, resulting in increased sodium loss and potassium gain, resulting in a negative sodium balance. Furosemide 

In [28]:

result = rag_chain.invoke("My whole body itches what should I take?")
prnt(result['text'])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


It is not clear from the provided context what specific medical condition or problem the patient is experiencing. However, based on the context, the patient may be experiencing itching, which could be a symptom of various medical conditions such as atopic dermatitis, contact dermatitis, psoriasis, or fungal infections.

For atopic dermatitis, a common treatment includes topical corticosteroids such as prednisolone eye drops or topical gel. The dosage of prednisolone eye drops is typically 1 drop in each eye twice daily. The tablet form of prednisolone is not suitable for topical use.

For contact dermatitis, a common treatment includes topical antibiotics such as erythromycin ointment or topical antibiotic cream. The dosage of erythromycin ointment is typically applied to the affected area once or twice daily.

For psoriasis, a common treatment includes topical calcineurin inhibitors such as tacrolimus ointment or topical calcineurin inhibitor cream. The dosage of tacrolimus ointment i

In [37]:
# import speech_recognition as sr

# r = sr.Recognizer()

# with sr.Microphone() as source:
#     print("Speak now...")
#     audio = r.listen(source)

# try:
#     text = r.recognize_google(audio)
#     print("You said:", text)
#     with open("output.txt", "w") as file:
#         file.write(text)
# except sr.UnknownValueError:
#     print("Could not understand audio")
# except sr.RequestError as e:
#     print("Could not request results from Google Speech Recognition service; {0}".format(e))