In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
LORA_PATH = "/content/drive/MyDrive/DiagnoLens/lora_model"
LORA_PATH

'/content/drive/MyDrive/DiagnoLens/lora_model'

In [None]:
!ls -la "$LORA_PATH"


total 279427
-rw------- 1 root root      1356 Nov 13 08:09 adapter_config.json
-rw------- 1 root root 268858112 Nov 13 08:02 adapter_model.safetensors
-rw------- 1 root root      4909 Nov 13 08:03 chat_template.jinja
-rw------- 1 root root       477 Nov 13 08:09 preprocessor_config.json
-rw------- 1 root root      5284 Nov 13 08:11 README.md
-rw------- 1 root root       454 Nov 13 08:10 special_tokens_map.json
-rw------- 1 root root     50814 Nov 13 08:11 tokenizer_config.json
-rw------- 1 root root  17210088 Nov 13 08:10 tokenizer.json


**Install all required packages**

---



In [None]:
!pip install transformers==4.41.2
!pip install peft
!pip install accelerate
!pip install bitsandbytes
!pip install pillow
!pip install flask
!pip install flask-cors
!pip install streamlit
!pip install pyngrok
!pip install sentencepiece



Collecting transformers==4.41.2
  Downloading transformers-4.41.2-py3-none-any.whl.metadata (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.20,>=0.19 (from transformers==4.41.2)
  Downloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.41.2-py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m113.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tokenizers-0.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m132.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers


In [None]:
import os

HF_CACHE = "/content/drive/MyDrive/DiagnoLens/hf_cache"
os.makedirs(HF_CACHE, exist_ok=True)

os.environ["HF_HOME"] = HF_CACHE
os.environ["TRANSFORMERS_CACHE"] = HF_CACHE

HF_CACHE


'/content/drive/MyDrive/DiagnoLens/hf_cache'

# **app.py**

In [None]:
%%writefile app.py
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoProcessor, AutoModelForImageTextToText, AutoTokenizer
from peft import PeftModel
from PIL import Image
import torch, io, os

app = Flask(__name__)
CORS(app)

# --- CONFIG: change base model if needed (use smaller if no GPU / limited space)
BASE_MODEL = "unsloth/Llama-3.2-3B-Vision"   # recommended for Colab free
LORA_PATH = "/content/lora_model"             # keep as copied to runtime
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print("DEVICE:", DEVICE)
print("Loading processor from LORA_PATH (tokenizer/processor found in adapter folder if present)...")
processor = AutoProcessor.from_pretrained(LORA_PATH, local_files_only=True)

print("Loading base model (may download)...")
base_model = AutoModelForImageTextToText.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
    device_map="auto" if DEVICE == "cuda" else None
)

print("Applying LoRA adapter from", LORA_PATH)
model = PeftModel.from_pretrained(base_model, LORA_PATH, torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32)
model.to(DEVICE)
model.eval()

@app.route("/predict", methods=["POST"])
def predict():
    try:
        if "image" not in request.files:
            return jsonify({"error":"No image uploaded"}), 400
        f = request.files["image"]
        image = Image.open(io.BytesIO(f.read())).convert("RGB")

        # prompt — modify if you used a different chat template
        prompt = "Diagnose the condition shown in the image and give a short explanation."

        # Preprocess and move to device
        inputs = processor(images=image, text=prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.2)

        # Try to decode
        try:
            diagnosis = processor.decode(outputs[0], skip_special_tokens=True)
        except Exception:
            tokenizer = AutoTokenizer.from_pretrained(LORA_PATH, local_files_only=True)
            diagnosis = tokenizer.decode(outputs[0], skip_special_tokens=True)

        return jsonify({"diagnosis": diagnosis})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route("/")
def home():
    return "Flask backend running"

if __name__ == "__main__":
    app.run(host="127.0.0.1", port=5000)


Writing app.py


# **Streamlit.py**

In [None]:
%%writefile frontend.py
import streamlit as st
import requests
from PIL import Image

st.set_page_config(page_title="DiagnoLens", layout="centered")
st.title("DiagnoLens — AI Image Diagnosis")

backend_url = st.text_input("Backend diagnose URL:", value="http://127.0.0.1:5000/predict")

uploaded = st.file_uploader("Upload an image", type=["jpg","jpeg","png"])

if uploaded:
    image = Image.open(uploaded)
    st.image(image, caption="Uploaded Image", use_column_width=True)

    if st.button("Diagnose"):
        st.info("Sending to backend...")
        files = {"image": ("input.jpg", uploaded.getvalue(), "image/jpeg")}
        try:
            r = requests.post(backend_url, files=files, timeout=300)
            if r.status_code == 200:
                st.subheader("Diagnosis Output")
                st.write(r.json().get("diagnosis", r.json()))
            else:
                st.error(f"Backend returned {r.status_code}: {r.text}")
        except Exception as e:
            st.error(f"Request failed: {e}")


Writing frontend.py


# **Start Flask in the background**

In [None]:
# ensure LORA_PATH env is set for app.py (app reads LORA_PATH directly)
import os
os.environ["LORA_PATH"] = "/content/lora_model"

# start Flask in background
get_ipython().system_raw("nohup python3 app.py > backend.log 2>&1 &")
print("Flask started in background. Check backend.log for startup logs.")


Flask started in background. Check backend.log for startup logs.


# **Run Flask API + Get URL (Ngrok)**

In [None]:
from pyngrok import ngrok

ngrok.set_auth_token("328MbzDudmjj5JcP8WLi1o0kvaG_63zXvVEyL9LiejAoaALCW")
flask_tunnel = ngrok.connect(5000)
print("Flask public URL:", flask_tunnel.public_url)





Flask public URL: https://08c923bece7a.ngrok-free.app


# ***Start Flask backend in background***

In [None]:
import os
os.environ["LORA_PATH"] = LORA_PATH

get_ipython().system_raw("nohup python3 app.py > backend.log 2>&1 &")

print("Flask backend started!")


Flask backend started!


# **Create ngrok public URL for Flask**

In [None]:
from pyngrok import ngrok
flask_tunnel = ngrok.connect(5000)
flask_tunnel


<NgrokTunnel: "https://034666c1e7fb.ngrok-free.app" -> "http://localhost:5000">

# **Start Streamlit frontend in background**

In [None]:
get_ipython().system_raw("nohup streamlit run frontend.py --server.port 8080 --server.headless true > streamlit.log 2>&1 &")

print("Streamlit running!")


Streamlit running!


# **Create ngrok public URL for Streamlit**

In [None]:
streamlit_tunnel = ngrok.connect(8080)
streamlit_tunnel


<NgrokTunnel: "https://73fd1b2145d4.ngrok-free.app" -> "http://localhost:8080">

In [None]:
!pkill -f ngrok


In [None]:
!ps -ef | grep ngrok


In [None]:
!ps -ef | grep streamlit

root        1858     269  0 09:52 ?        00:00:00 /bin/bash -c ps -ef | grep streamlit
root        1860    1858  0 09:52 ?        00:00:00 grep streamlit


In [None]:
!cat streamlit.log



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.

2025-11-21 09:50:41.403 Port 8080 is already in use
