In [None]:
!pip install gradio
!pip install langchain
!pip install langchain-community
!pip install transformers
!pip install langchain-google-genai # Install version 0.0.18
!pip install python-docx
# Install Tesseract and pytesseract
!apt-get install -y tesseract-ocr
!pip install pytesseract



In [None]:
#Set up dependencies
#-------------------------------------------------------------------------------------
#api gemini key
import os
import getpass
import gradio as gr
import pytesseract
from PIL import Image
import time
import re
os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Gemini API key: ")

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Initialize Gemini LLM
llm = ChatGoogleGenerativeAI(model='gemini-1.5-flash', temperature=0)


#loading the database

from docx import Document


doc = Document('ingrdb.docx')

full_text = []
for paragraph in doc.paragraphs:
    full_text.append(paragraph.text)

# Combine paragraphs into a single string
db = "\n".join(full_text)

#print(db)

In [None]:
#functions

def extract_text_from_image(image_path):
    image = Image.open(image_path)
    extracted_text = pytesseract.image_to_string(image)
    return extracted_text.strip()

# Example usage
# text = extract_text_from_image("path_to_image.jpg")
# print(text)


#correct ocr errors

def correct_ocr_errors(extracted_text):

    # Define the prompt template within the function
    template = """System Message:
    "role": "system", "content": "You are a helpful assistant that corrects errors in ocr output ."
    User Message:
    From the following text:{text} correct the errors in the units
    Here are some examples of errors
    Mistaking o for 0, S for 5, i for 1, smg for 5mg
    If there are no errors just return the given text
    """

    # Create the prompt from the template
    prompt = PromptTemplate.from_template(template)

    # Create the LLM chain inside the function
    chain = LLMChain(llm=llm, prompt=prompt)

    try:
        # Run the chain to extract ingredients
        response = chain.run(text=extracted_text)

        # Check for response and return the result or an error message
        if response and response.strip():  # Ensure the response is not None or just whitespace
            return response.strip()
        else:
            return "Error: No Ingredients or quantities were identified. Please check the input text or try again."
    except Exception as e:
        # Return any errors that occur during processing
        return f"An error occurred: {e}"
#corrected_ocr_output = correct_ocr_errors(extracted_text)
#print(corrected_ocr_output)


#analyze ingredients for use and drawbacks

# Define the function to extract ingredients
def analyze_data(corrected_ocr_output,db):
    # Define the prompt template within the function
    template = """System Message:
    "role": "system", "content": "You are a helpful assistant that evaluates bodybuilding ingredients based on scientific evidence"

    User Message: First make sure the text supplied is supplement information otherwise output: The information given is not a supplement. No processing done.
    Else do the following
    Using the following information: {text}
    and the following database: {db}
    Output what the following;
    Use of an ingredient, level of evidence and potential side effects
    If there is no data available in supplied database use what you know"""

    # Create the prompt from the template
    prompt = PromptTemplate.from_template(template)

    # Create the LLM chain inside the function
    chain = LLMChain(llm=llm, prompt=prompt)

    try:
        # Run the chain to extract ingredients
        response = chain.run(text=corrected_ocr_output,db=db)

        # Check for response and return the result or an error message
        if response and response.strip():  # Ensure the response is not None or just whitespace
            return response.strip()
        else:
            return "Error: No data is available. Please check the input text or try again."
    except Exception as e:
        # Return any errors that occur during processing
        return f"An error occurred: {e}"

#ingr_use = analyze_data(corrected_ocr_output,db)
#print(ingr_use)

#evaluate prop blends and other sneaky stuff
def analyze_tricks(corrected_ocr_output):
    # Define the prompt template within the function
    template = """System Message:
    "role": "system", "content": "You are a helpful assistant that evaluates the effectiveness of bodybuilding supplements"

    Caution-->First make sure the text supplied is supplement information otherwise output: The information given is not a supplement. No processing done.
    Else do the following
    User Message:
    Using the following information: {text}
    what tricks that supplements use are present
    Here is a guide you can use:

    Supplement manufacturers often use various tricks and marketing tactics to make their products seem more effective or appealing. Here are some common strategies:

 1. Proprietary Blends
   - Explanation: Companies list a mix of ingredients as a “proprietary blend” without disclosing the exact amounts of each ingredient.
   - Reason: This allows manufacturers to use minimal effective dosages of costly or effective ingredients while bulking up the blend with cheaper fillers.
   - Impact: Consumers don’t know if they’re getting an effective dose of the key ingredients.

 2. Fairy Dusting
   - Explanation: Including a small, ineffective amount of a popular or proven ingredient just to list it on the label.
   - Reason: This can make the supplement look more powerful without the cost of including effective doses.
   - Impact: Misleads consumers into thinking the product is more effective than it is.

 3. Under-Dosing
   - Explanation: Using less than the scientifically recommended dosage of an active ingredient.
   - Reason: To reduce costs while being able to claim the ingredient is present.
   - Impact: Consumers may not get the desired effect due to insufficient amounts.

 4. Over-Dosing of Cheaper Ingredients
   - Explanation: Using high amounts of inexpensive or less effective ingredients to create the appearance of a strong formula.
   - Reason: To bulk up the label and make the supplement seem packed with active substances.
   - Impact: May lead to the inclusion of unnecessary or even harmful amounts of certain components.

 5. Claims Without Evidence
   - Explanation: Making strong health or performance claims that aren't backed by reliable studies or evidence.
   - Reason: Marketing ploys designed to attract attention and boost sales.
   - Impact: Consumers may be misled into believing a product is more effective than it really is.

 6. Ingredient Sourcing and Quality
   - Explanation: Using lower-quality or less bioavailable forms of ingredients.
   - Reason: Cost-saving measures that still allow the manufacturer to list the ingredient on the label.
   - Impact: Poor-quality ingredients may not be absorbed well or utilized effectively by the body.

 7. Mislabeling and Hidden Ingredients
   - Explanation: Failing to properly label ingredients or including substances that aren't disclosed.
   - Reason: Can sometimes be done unintentionally, but in some cases, it's a deliberate attempt to hide banned or less desirable substances.
   - Impact: Safety and effectiveness can be compromised.

 8. ‘Clinically Proven’ Phrases
   - Explanation: Using phrases like "clinically proven" even when studies may be biased, poorly designed, or funded by the supplement company itself.
   - Reason: To gain consumer trust and justify higher prices.
   - Impact: Can create a false sense of reliability.

 9. Borrowed Science
   - Explanation: Claiming benefits based on studies of different forms or dosages of an ingredient, not the specific one used in the supplement.
   - Reason: To make a product appear more scientifically supported than it is.
   - Impact: Consumers may be misled into believing the product’s efficacy is proven.

 10. Unclear Serving Sizes
   - Explanation: Using serving sizes that are larger than practical (e.g., recommending multiple pills for one serving).
   - Reason: To make it seem like the product has a higher concentration of active ingredients.
   - Impact: Can be deceptive about how much of the product you actually need to take to see results.

 11. Buzzwords and Over-Promising
   - Explanation: Using terms like “all-natural,” “scientifically formulated,” or “pharmaceutical-grade” without regulation or clear meaning.
   - Reason: Marketing hype that appeals to health-conscious consumers.
   - Impact: Consumers may believe the product is higher quality or more effective than it really is.

 12. Artificial Colors and Fillers
   - Explanation: Using unnecessary additives to make supplements visually appealing.
   - Reason: To differentiate products and improve marketability.
   - Impact: These additives may not provide any health benefit and could even be harmful.

 13. Label Padding
   - Explanation: Adding common vitamins and minerals to boost the ingredient count on the label.
   - Reason: To make the product look more comprehensive.
   - Impact: Can mislead consumers into thinking they are getting more active ingredients.

    Understanding these tricks can help consumers make more informed decisions and choose supplements that are effective and safe.
    If none ,output No tricks used"""

    # Create the prompt from the template
    prompt = PromptTemplate.from_template(template)

    # Create the LLM chain inside the function
    chain = LLMChain(llm=llm, prompt=prompt)

    try:
        # Run the chain to extract ingredients
        response = chain.run(text=corrected_ocr_output)

        # Check for response and return the result or an error message
        if response and response.strip():  # Ensure the response is not None or just whitespace
            return response.strip()
        else:
            return "Error: No data is available. Please check the input text or try again."
    except Exception as e:
        # Return any errors that occur during processing
        return f"An error occurred: {e}"

#supp_trick = analyze_tricks(corrected_ocr_output)
#print(supp_trick)


#evaluate prop blends and other sneaky stuff
def analyze_dosage(corrected_ocr_output,db):
    # Define the prompt template within the function
    template = """System Message:
    "role": "system", "content": "You are a helpful assistant that evaluates the effectiveness of bodybuilding supplements"
     Caution-->First make sure the text supplied is supplement information otherwise output: The information given is not a supplement. No processing done.
    User Message:
    Using the following information: {text} and the following database :{db}
    Analyze the ingredients to find out which ingredients are overdosed, well dosed, underdosed.
    If a propietary blend is present output unknown.
    """

    # Create the prompt from the template
    prompt = PromptTemplate.from_template(template)

    # Create the LLM chain inside the function
    chain = LLMChain(llm=llm, prompt=prompt)

    try:
        # Run the chain to extract ingredients
        response = chain.run(text=corrected_ocr_output,db=db)

        # Check for response and return the result or an error message
        if response and response.strip():  # Ensure the response is not None or just whitespace
            return response.strip()
        else:
            return "Error: No data is available. Please check the input text or try again."
    except Exception as e:
        # Return any errors that occur during processing
        return f"An error occurred: {e}"

#supp_dose = analyze_dosage(corrected_ocr_output,db)
#print(supp_dose)




#evaluate prop blends and other sneaky stuff
def final_pass(ingr_use,supp_trick,supp_dose):
    # Define the prompt template within the function
    template = """System Message:
    "role": "system", "content": "You are a helpful assistant that evaluates the effectiveness of bodybuilding supplements"


    User Message:
    Using the following information on ingredient usage: {ingr_use} , and dosage information :{supp_dose}
    Provide a summary in TABULAR FORMAT
    The table should have a row for;
    Ingredient(dosage):eg Creatine(25 grams)
    Usage:Aids in the Convertion of ADP to ATP which increases energy
    Side effects:Weight gain due to water retention
    Level of evidence:High
    Dosage:Well dosed
    Use 5000 characters of only essential information on the supplement and all the ingredients in simple and clear words
    Make sure every ingredient is included.
    Then finish off with a summary in list form of any potential tricks;{supp_trick} that may have been present in the supplement.
    And a warning: Note: This analysis is based solely on the provided information and does not constitute medical advice. Consult a healthcare professional before using any supplement.
    """

    # Create the prompt from the template
    prompt = PromptTemplate.from_template(template)

    # Create the LLM chain inside the function
    chain = LLMChain(llm=llm, prompt=prompt)

    try:
        # Run the chain to extract ingredients
        response = chain.run(ingr_use=ingr_use,supp_trick=supp_trick,supp_dose=supp_dose)

        # Check for response and return the result or an error message
        if response and response.strip():  # Ensure the response is not None or just whitespace
            return response.strip()
        else:
            return "Error: No data is available. Please check the input text or try again."
    except Exception as e:
        # Return any errors that occur during processing
        return f"An error occurred: {e}"

#fiinalpass = final_pass(ingr_use,supp_trick,supp_dose)
#print(fiinalpass)


#evaluate prop blends and other sneaky stuff
def rate_supp(finalpass):
    # Define the prompt template within the function
    template = """System Message:
    "role": "system", "content": "You are a strict and unbiased evaluator of bodybuilding supplements."

    Caution: First verify that the provided text is supplement-related. If not, output:
    "The information given is not a supplement. No processing done."

    User Message:
    Using the following information: {text}

    Evaluate the all the ingredients on the following criteria, assigning scores and weights:

    **Scientific Evidence (40%)**
    - 5: Strong meta-analyses or multiple RCTs.
    - 4: A few well-designed studies.
    - 3: Limited studies with promising results.
    - 2: Anecdotal evidence or poor studies.
    - 1: No credible evidence.

    **Proper Dosage (30%)**
    - 5: Exactly within the recommended range.
    - 4: Slightly above/below but still effective.
    - 3: Somewhat off (e.g., 50% or 150% of proper dosage).
    - 2: Outside effective range but not harmful.
    - 1: Well below or excessive.
    - 🚨 If key ingredients are hidden in a proprietary blend, cap this score at 2.

    **Tricks Used (15%)**
    - 5: Fully transparent label, no tricks.
    - 4: Minor marketing exaggerations, but fair dosing.
    - 3: Some underdosed ingredients or partial blends.
    - 2: Heavy use of proprietary blends, deceptive claims.
    - 1: Highly misleading, hiding key dosages.

    **Safety (15%)**
    - 5: No safety concerns.
    - 4: Minor side effects but safe.
    - 3: Some risks if misused.
    - 2: Multiple reports of side effects.
    - 1: Contains potentially harmful substances.

    **🚨 Automatic Penalties:**
    - If "Tricks" ≤ 2 AND "Safety" ≤ 2, apply a 2-point penalty to the final rating.

    Now, aggregate these scores into a final rating out of 10.
    **Only output the final rating as a number. No extra text.**
    """

    # Create the prompt from the template
    prompt = PromptTemplate.from_template(template)

    # Create the LLM chain inside the function
    chain = LLMChain(llm=llm, prompt=prompt)

    try:
        # Run the chain to extract ingredients
        response = chain.run(text=finalpass)

        # Check for response and return the result or an error message
        if response and response.strip():  # Ensure the response is not None or just whitespace
            return response.strip()
        else:
            return "Error: No data is available. Please check the input text or try again."
    except Exception as e:
        # Return any errors that occur during processing
        return f"An error occurred: {e}"

#supp_rate = rate_supp(fiinalpass)
#print(supp_rate)


In [None]:
#Validate input
def validate_input(extracted_text):
    """Validates the extracted text to ensure it's a supplement label and prevents LLM abuse."""

    # 🚨 Step 1: Check if the input is empty or None
    if not extracted_text or extracted_text.strip() == "":
        return False, "❌ Error: No valid text found in the image. Please upload a clear supplement label."

    # 🚨 Step 2: Check for unrelated text (LLM prompt injection protection)
    # Block words commonly used in malicious prompt injections
    forbidden_words = ["ignore previous", "disregard", "system prompt", "password", "overrides", "admin", "reset"]
    if any(word in extracted_text.lower() for word in forbidden_words):
        return False, "⚠️ Error: Suspicious input detected. Please upload a supplement label."

    # 🚨 Step 3: Check if text follows a supplement label pattern
    # Step 3: Improve supplement keyword detection
    supplement_keywords = ["supplement facts", "serving size", "ingredients", "per serving", "mg", "g", "iu", "capsules"]

    # Count how many supplement-related words exist in the extracted text
    keyword_count = sum(1 for word in supplement_keywords if word in extracted_text.lower())

    if keyword_count < 3:  # Require at least 3 supplement-related words
        return False, "⚠️ Error: Uploaded text does not appear to be a supplement label. Please try again."

    return True, None  # ✅ Valid input  # ✅ Valid input


In [None]:
# FAQ Page Content
faq_content = """
### 🤔 Frequently Asked Questions (FAQ)

**1. What is Doselytics?**
Doselytics is an AI-powered supplement analyzer that evaluates the ingredients and dosages in supplements.

**2. How does it work?**
Upload a supplement label, and the AI extracts ingredients, checks dosage accuracy, and provides a rating.

**3. Is Doselytics free?**
Yes! The app is totally free and allows you to analyze supplements without any cost.

**4. Why is analysis taking so long?**
This is most likely due to having a poor internet connection.

**5. What if the OCR is inaccurate?**
Try using clearer images with better lighting, or manually edit extracted text.

**6. How is the supplement rating calculated?**
The system compares ingredient dosages with scientific recommendations and rates supplements based on effectiveness and accuracy.

**7. Does it support proprietary blends?**
Yes, but Doselytics may not always determine exact amounts in proprietary blends.

**8. Can I trust the recommendations?**
Our recommendations are based on scientific research, but always consult a healthcare professional for final decisions.

**9. How do I report incorrect results?**
You can send feedback through email.

"""

In [None]:
#Calling the functions
def analyze_label(image, progress=gr.Progress()):
    """Analyze supplement label with debugging statements"""
    if image is None:
        return "Please upload a supplement label."


    progress(0, desc="Starting Analysis...")
    time.sleep(1)

    # Save uploaded image temporarily
    image_path = image
    print("📸 Image saved:", image_path)

    # Step 1: Extract text from image
    extracted_text = extract_text_from_image(image_path)
    print("📝 Extracted Text Parsed", extracted_text)
    progress(0.2, desc="Text Extracted...")
    time.sleep(1)

    # Step 2: Validate input
    is_valid, error_message = validate_input(extracted_text)
    if not is_valid:
        return error_message

    # Step 3: Preprocess text
    corrected_ocr_output = correct_ocr_errors(extracted_text)
    print("✅ Corrected OCR Output Parsed", corrected_ocr_output)
    progress(0.3, desc="Text Corrected...")
    time.sleep(1)

    # Step 4: Extract ingredients using Gemini
    ingr_use = analyze_data(corrected_ocr_output, db)
    print("🔬 Ingredients Analysis Parsed", ingr_use)
    progress(0.4, desc="Analyzing Ingredients...")
    time.sleep(1)

    # Step 5: Generate final analysis
    supp_trick = analyze_tricks(corrected_ocr_output)
    print("🧐 Supplement Tricks Analysis Parsed", supp_trick)

    supp_dose = analyze_dosage(corrected_ocr_output, db)
    print("💊 Supplement Dosage Analysis Parsed", supp_dose)
    progress(0.6, desc="Analyzing dosage...")
    time.sleep(1)

    finalpass = final_pass(ingr_use, supp_trick, supp_dose)
    print("📊 Final Analysis Parsed", finalpass)

    progress(0.8, desc="Preparing final output...")
    time.sleep(1)

    supp_rate = rate_supp(finalpass)
    print("⭐ Supplement Rating Parsed", supp_rate)

    progress(0.9, desc="Analysis Complete...")
    time.sleep(1)

    finalpass = finalpass.strip()  # Remove trailing spaces
    finalpass = finalpass.encode("utf-8", "ignore").decode("utf-8")  # Ensure valid encoding

    progress(1, desc="Displaying Results...")
    time.sleep(1)

    # Format output in markdown table for better readability
    result_output = f"""
    ⭐ Final Rating: `{supp_rate}/10`
    {finalpass}
    """

    # Save to Markdown file
    file_path = "/content/supplement_analysis.md"  # Temporary storage in Colab
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(result_output)

    return result_output, file_path  # Return both the result and file path

def reset_ui():
    return None, """📋 **Results will appear here...**<br>
                    **....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>
                    **....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>
                    **📋Results will appear here...**""", None, gr.update(visible=False, interactive=False)

def save_results_to_file(results):
    """Save analysis results to a text file"""
    if results and "Results will appear" not in results:
        with open("supplement_analysis.txt", "w", encoding="utf-8") as file:
            file.write(results)
        return "Results saved successfully! ✅"
    return "No results to save."




In [None]:
# Creting the UI using Gradio

#Css
css = """
#results-box {
    max-height: 500px; /* Fixed height */
    overflow-y: auto;   /* Scrollable */
    border: 1px solid var(--border-color, #ccc);
    padding: 10px;
    background: var(--background-color, #ffffff);
    color: var(--text-color, #000000);
    font-size: 16px;
    font-weight: bold;
    border-radius: 5px;
    transition: all 0.3s ease-in-out;
}

/* Dark Mode Support */
@media (prefers-color-scheme: dark) {
    #results-box {
        background: var(--background-color-dark, #222);
        color: var(--text-color-dark, #fff);
        border-color: var(--border-color-dark, #444);
    }
}

"""
#Gradio

with gr.Blocks(css=css) as app:
    with gr.Tabs():
        with gr.Tab("🏠 Home"):
            gr.Markdown("# 🏋️ Welcome to Doselytics!")
            gr.Markdown("### Get to know your Supplements with Doselytics. 📊💊")
            gr.Markdown("### Our AI-powered supplement analyzer tool. 🔍")

            # First section: Image left, text right
            with gr.Row():
                gr.Image("banner1.png", width=200, height=300, show_label=False)
                gr.Markdown("## 💡 Want to make infromed decisions with your supplements?<br> Welcome to  Doselytics <br> An AI powered supplement analyzer app")

            # Second section: Text left, image right
            with gr.Row():
                gr.Markdown("""## Why use Doselytics <br>
                - 🏋️ **Analyze Supplement using the latest science**<br>
                - 🔬 **Understand Ingredient Effectiveness**<br>
                - 📜 **Detect Misleading Claims & Dosage Issues**<br>
                - ⭐ **Get AI-Powered Ratings**<br>
                - 📊 **Make Informed Health Choices**
                """)
                gr.Image("banner2.png", width=200, height=300, show_label=False)

            # Third section: Image left, text right
            with gr.Row():
                gr.Image("banner3.png", width=200, height=300, show_label=False)
                gr.Markdown("## 🚀 Section Three: Trust science. Trust Doselytics.")


        with gr.Tab("🔍 Analyze"):
            gr.Markdown("# 🏋️ Doselytics Supplement Analyzer")
            gr.Markdown("Upload an image of your supplement label, and AI will analyze it.")

            with gr.Row():
                with gr.Column(scale=1):  # Upload section (smaller)
                    upload = gr.Image(type="filepath", label="Upload Supplement Label", width=300, height=500)
                    with gr.Row():
                        analyze_button = gr.Button("🔍 Analyze", interactive=False)
                        reset_button = gr.Button("🔄 Reset")

                with gr.Column(scale=3):  # Results section (larger)
                    output = gr.Markdown(
                    """📋 **Results will appear here...**<br>
                    **....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>
                    **....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**....**<br>**
                    📋Results will appear here...**""",
                    elem_id="results-box"
                    )
                    download_file = gr.File(label="📥 Download Report", interactive=False, visible=False)




            upload.change(lambda x: gr.update(interactive=True) if x else gr.update(interactive=False),inputs=[upload], outputs=[analyze_button])
            def process_and_enable_download(image):
              results, file_path = analyze_label(image)
              return results, file_path, gr.update(interactive=True, visible=True)
            analyze_button.click(fn=process_and_enable_download, inputs=[upload], outputs=[output, download_file, download_file])



            reset_button.click(fn=reset_ui, inputs=[], outputs=[upload, output, download_file, download_file])




        with gr.Tab("📖 FAQs"):
            gr.Markdown(faq_content)

app.launch(share=True, debug=True)
