In [1]:
!pip install -q -U bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m82.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m71.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m42.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
!pip install -q -U transformers accelerate peft

In [4]:
import gradio as gr
import torch
from transformers import AutoTokenizer
from peft import PeftModel
from transformers import AutoModelForCausalLM, BitsAndBytesConfig


base_model = "HuggingFaceH4/zephyr-7b-beta"
tokenizer = AutoTokenizer.from_pretrained(base_model)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.float16)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [5]:
# Load LoRA adapter
adapter_path = "/content/drive/MyDrive/finetunedmodel"  # Upload your adapter files here
model = PeftModel.from_pretrained(model, adapter_path)
model.eval()



PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=2)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
     

In [6]:

def chat(user_message, history, max_tokens):
    prompt = ""
    for user, assistant in history:
        prompt += f"<|user|>\n{user}\n<|assistant|>\n{assistant}\n"
    prompt += f"<|user|>\n{user_message}\n<|assistant|>\n"

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    final_response = response.split("<|assistant|>")[-1].strip()
    history.append((user_message, final_response))
    return history, history


with gr.Blocks(theme=gr.themes.Base()) as demo:
    gr.Markdown("# 🌍 Zephyr-7B Geospatial Chatbot")
    gr.Markdown("Ask about GIS, shapefiles, raster/vector data, and geospatial processing techniques.")

    with gr.Row():
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(label="Geospatial Assistant", height=400)
            user_input = gr.Textbox(placeholder="Ask a geospatial question...", label="Your Query")
            submit = gr.Button("Send")
            clear = gr.Button("Clear Chat")
        with gr.Column(scale=1):
            gr.Markdown("## ⚙️ Settings")
            max_tokens = gr.Slider(50, 512, value=256, step=16, label="Max Response Tokens")
            gr.Markdown("**Model**: `zephyr-7b-beta` + LoRA\n**Device**: GPU (quantized 4-bit)\n**Adapter**: Custom-trained")

    state = gr.State([])

    submit.click(chat, inputs=[user_input, state, max_tokens], outputs=[chatbot, state])
    clear.click(lambda: ([], []), outputs=[chatbot, state])

demo.launch(share=True)


  chatbot = gr.Chatbot(label="Geospatial Assistant", height=400)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c628427608751ba235.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


