<a href="https://colab.research.google.com/github/denis-shema/aiclass/blob/main/PROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import json

# Sample Q&A entries
qa_data = [
    {
        "category": "Startup Guidance",
        "question": "How do I start a cattle farm for meat production?",
        "answer": "Start by selecting a suitable breed like Brahman or Angus, prepare land with proper fencing, build shelter, and plan your budget for feed, healthcare, and equipment."
    },
    {
        "category": "Daily Farming Advice",
        "question": "What is the best feeding schedule for weight gain?",
        "answer": "Feed cattle twice a day with a mix of high-energy feed like maize bran, protein supplements, and clean water. Monitor intake and adjust based on growth."
    },
    {
        "category": "Risk Management",
        "question": "How can I prevent disease outbreaks in my herd?",
        "answer": "Ensure regular vaccinations, maintain hygiene in housing, isolate sick animals, and consult a vet for early symptoms like coughing or weight loss."
    }
]

# Save to JSON file
with open("cattle_farming_qa.json", "w") as f:
    json.dump(qa_data, f, indent=4)

print("✅ Q&A dataset created and saved as 'cattle_farming_qa.json'")


# Load and inspect the dataset
def load_qa_dataset(filepath):
    with open(filepath, "r") as f:
        data = json.load(f)
    for i, entry in enumerate(data):
        print(f"\nQ{i+1}: {entry['question']}")
        print(f"A{i+1}: {entry['answer']}")
        print(f"Category: {entry['category']}")

# Run inspection
load_qa_dataset("cattle_farming_qa.json")







✅ Q&A dataset created and saved as 'cattle_farming_qa.json'

Q1: How do I start a cattle farm for meat production?
A1: Start by selecting a suitable breed like Brahman or Angus, prepare land with proper fencing, build shelter, and plan your budget for feed, healthcare, and equipment.
Category: Startup Guidance

Q2: What is the best feeding schedule for weight gain?
A2: Feed cattle twice a day with a mix of high-energy feed like maize bran, protein supplements, and clean water. Monitor intake and adjust based on growth.
Category: Daily Farming Advice

Q3: How can I prevent disease outbreaks in my herd?
A3: Ensure regular vaccinations, maintain hygiene in housing, isolate sick animals, and consult a vet for early symptoms like coughing or weight loss.
Category: Risk Management


In [None]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m48.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m77.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.50.0


In [None]:
import streamlit as st
import json

# Load Q&A dataset
with open("cattle_farming_qa.json", "r") as f:
    qa_data = json.load(f)

# Search function
def find_answer(user_question):
    for entry in qa_data:
        if user_question.lower() in entry["question"].lower():
            return entry["answer"]
    return "Sorry, I don't have an answer for that yet."

# Streamlit UI
st.set_page_config(page_title="Smart Livestock Production Tracker", layout="centered")
st.title("🐄 Smart Livestock Production Tracker")
st.subheader("Ask your farming question below:")

user_question = st.text_input("Type your question here")

if st.button("Get Advice"):
    if user_question.strip():
        answer = find_answer(user_question)
        st.markdown(f"**Advice:** {answer}")
    else:
        st.warning("Please enter a question first.")


2025-09-26 06:24:38.649 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-09-26 06:24:38.669 Session state does not function when running a script without `streamlit run`


In [None]:
!streamlit run livestock_tracker_app.py


Usage: streamlit run [OPTIONS] TARGET [ARGS]...
Try 'streamlit run --help' for help.

Error: Invalid value: File does not exist: livestock_tracker_app.py


In [None]:
!pip install flask sentence-transformers torch



In [None]:
from flask import Flask, request, jsonify, render_template
from sentence_transformers import SentenceTransformer, util
import torch
import json

app = Flask(__name__)

# Load Q&A data
with open("cattle_farming_qa.json", "r") as f:
    qa_data = json.load(f)

# Load model and encode questions
model = SentenceTransformer('all-MiniLM-L6-v2')
stored_questions = [entry["question"] for entry in qa_data]
stored_embeddings = model.encode(stored_questions, convert_to_tensor=True)

# Cattle-only filter
def is_cattle_related(question):
    keywords = [
        "cow", "cattle", "milk", "beef", "pasture", "tick", "mastitis", "calf",
        "bull", "heifer", "grazing", "ruminant", "livestock", "hay", "fodder",
        "hoof", "disease", "feed", "drench", "branding", "castration"
    ]
    return any(word in question.lower() for word in keywords)

# Semantic search
def find_answer(user_question):
    user_embedding = model.encode(user_question, convert_to_tensor=True)
    scores = util.cos_sim(user_embedding, stored_embeddings)[0]
    best_idx = scores.argmax().item()
    best_score = scores[best_idx].item()
    return qa_data[best_idx]["answer"] if best_score > 0.5 else "Sorry, I don't have an answer for that yet."

# Routes
@app.route("/")
def home():
    return render_template("index.html")

@app.route("/get_advice", methods=["POST"])
def get_advice():
    data = request.get_json()
    question = data.get("question", "")
    if not is_cattle_related(question):
        return jsonify({"answer": ""})  # No response for non-cattle questions
    answer = find_answer(question)
    return jsonify({"answer": answer})

if __name__ == "__main__":
    app.run(debug=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with watchdog (inotify)


In [None]:
# Save this as livestock_tracker_app.py
import streamlit as st
import json

# Load Q&A dataset
with open("cattle_farming_qa.json", "r") as f:
    qa_data = json.load(f)

def find_answer(user_question):
    for entry in qa_data:
        if user_question.lower() in entry["question"].lower():
            return entry["answer"]
    return "Sorry, I don't have an answer for that yet."

st.set_page_config(page_title="Smart Livestock Production Tracker", layout="centered")
st.title(" Smart Livestock Production Tracker")
st.subheader("Ask your farming question below:")

user_question = st.text_input("Type your question here")

if st.button("Get Advice"):
    if user_question.strip():
        answer = find_answer(user_question)
        st.markdown(f"**Advice:** {answer}")
    else:
        st.warning("Please enter a question first.")



In [None]:
!pip install streamlit



In [None]:
!pip install fuzzywuzzy[speedup]


Collecting fuzzywuzzy[speedup]
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting python-levenshtein>=0.12 (from fuzzywuzzy[speedup])
  Downloading python_levenshtein-0.27.1-py3-none-any.whl.metadata (3.7 kB)
Collecting Levenshtein==0.27.1 (from python-levenshtein>=0.12->fuzzywuzzy[speedup])
  Downloading levenshtein-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.27.1->python-levenshtein>=0.12->fuzzywuzzy[speedup])
  Downloading rapidfuzz-3.14.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading python_levenshtein-0.27.1-py3-none-any.whl (9.4 kB)
Downloading levenshtein-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (159 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.9/159.9 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
D

In [None]:
from fuzzywuzzy import fuzz
import json

# Load dataset
with open("cattle_farming_qa.json", "r") as f:
    qa_data = json.load(f)

# Fuzzy match function
def find_best_match(user_question, threshold=70):
    best_score = 0
    best_answer = "Sorry, I don't have an answer for that yet."

    for entry in qa_data:
        score = fuzz.partial_ratio(user_question.lower(), entry["question"].lower())
        if score > best_score and score >= threshold:
            best_score = score
            best_answer = entry["answer"]

    return best_answer, best_score

In [None]:
# Example usage of the fuzzy match function and printing the score
user_question = "How do I start a farm?"
answer, score = find_best_match(user_question)
print(f"Question: {user_question}")
print(f"Best Answer: {answer}")
print(f"Best match score: {score}")

Question: How do I start a farm?
Best Answer: Start by selecting a suitable breed like Brahman or Angus, prepare land with proper fencing, build shelter, and plan your budget for feed, healthcare, and equipment.
Best match score: 82


updating the logic

In [None]:
if st.button("Get Advice"):
    if user_question.strip():
        answer = find_best_match(user_question)
        st.markdown(f"**Advice:** {answer}")
    else:
        st.warning("Please enter a question first.")




In [None]:
!pip install sentence-transformers




In [None]:
from sentence_transformers import SentenceTransformer, util
import json

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Load Q&A dataset
with open("cattle_farming_qa.json", "r") as f:
    qa_data = json.load(f)

# Create embeddings for stored questions
stored_questions = [entry["question"] for entry in qa_data]
stored_embeddings = model.encode(stored_questions, convert_to_tensor=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [38]:
def find_semantic_match(user_question):
    user_embedding = model.encode(user_question, convert_to_tensor=True)
    scores = util.cos_sim(user_embedding, stored_embeddings)[0]
    best_idx = scores.argmax().item()
    best_score = scores[best_idx].item()

    if best_score > 0.5:  # You can adjust this threshold
        return qa_data[best_idx]["answer"]
    else:
        return "Sorry, I couldn't find a good match for that question."


In [None]:
import streamlit as st
import json
from sentence_transformers import SentenceTransformer, util

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Load Q&A dataset
with open("cattle_farming_qa.json", "r") as f:
    qa_data = json.load(f)

# Create embeddings for stored questions
stored_questions = [entry["question"] for entry in qa_data]
stored_embeddings = model.encode(stored_questions, convert_to_tensor=True)

# Semantic match function
def find_semantic_match(user_question, filters=None):
    filtered_data = qa_data

    # Apply filters if provided
    if filters:
        for key, value in filters.items():
            filtered_data = [entry for entry in filtered_data if entry.get(key) == value]

    # If no matches after filtering, fallback to full dataset
    if not filtered_data:
        filtered_data = qa_data

    filtered_questions = [entry["question"] for entry in filtered_data]
    filtered_embeddings = model.encode(filtered_questions, convert_to_tensor=True)

    user_embedding = model.encode(user_question, convert_to_tensor=True)
    scores = util.cos_sim(user_embedding, filtered_embeddings)[0]
    best_idx = scores.argmax().item()
    best_score = scores[best_idx].item()

    if best_score > 0.5:
        return filtered_data[best_idx]["answer"]
    else:
        return "Sorry, I couldn't find a good match for that question."

# Streamlit UI
st.set_page_config(page_title="Smart Livestock Production Tracker", layout="centered")
st.title("🐄 Smart Livestock Production Tracker")
st.subheader("Ask your farming question below:")

user_question = st.text_input("Type your question here")

# Optional filters
season = st.selectbox("Season", ["None", "dry", "rainy"])
weather = st.selectbox("Weather Condition", ["None", "rain", "heat", "cold"])
disease = st.text_input("Disease Concern (optional)")

# Build filters dictionary
filters = {}
if season != "None":
    filters["season"] = season
if weather != "None":
    filters["weather"] = weather
if disease.strip():
    filters["disease"] = disease.strip().lower()

if st.button("Get Advice"):
    if user_question.strip():
        answer = find_semantic_match(user_question, filters)
        st.markdown(f"**Advice:** {answer}")
    else:
        st.warning("Please enter a question first.")




In [39]:
import json

# New Q&A entries to add
new_entries = [
    {
        "category": "Feeding",
        "question": "What feed improves milk production?",
        "answer": "High-protein legumes like lucerne and groundnut hay boost milk yield.",
        "season": "rainy"
    },
    {
        "category": "Shelter",
        "question": "How do I protect cattle during cold nights?",
        "answer": "Use dry bedding, windbreaks, and ensure shelter is insulated.",
        "weather": "cold"
    },
    {
        "category": "Health",
        "question": "What are signs of tick-borne diseases in cattle?",
        "answer": "Look for fever, weakness, swollen lymph nodes, and reduced milk production.",
        "disease": "tick-borne"
    },
    {
        "category": "Climate",
        "question": "What should I do during heavy rains?",
        "answer": "Ensure drainage in cattle shelters, raise feed storage, and monitor for hoof infections.",
        "weather": "rain"
    }
]

# Load existing dataset
with open("cattle_farming_qa.json", "r+") as f:
    try:
        data = json.load(f)
    except json.JSONDecodeError:
        print("Error: JSON file is empty or corrupted.")
        data = []

    # Add new entries
    data.extend(new_entries)

    # Validate entries
    def validate_entries(entries):
        for i, entry in enumerate(entries):
            if "question" not in entry or "answer" not in entry:
                print(f"Entry {i} is missing required fields.")
            elif not entry["question"].strip() or not entry["answer"].strip():
                print(f"Entry {i} has empty values.")
            else:
                print(f"Entry {i} is valid.")

    validate_entries(data)

    # Save updated dataset
    f.seek(0)
    json.dump(data, f, indent=2)
    f.truncate()

print("Dataset updated and validated successfully.")


Entry 0 is valid.
Entry 1 is valid.
Entry 2 is valid.
Entry 3 is valid.
Entry 4 is valid.
Entry 5 is valid.
Entry 6 is valid.
Entry 7 is valid.
Entry 8 is valid.
Entry 9 is valid.
Entry 10 is valid.
Dataset updated and validated successfully.


In [None]:
import json

# Load and validate original dataset
with open("cattle_farming_qa.json", "r") as f:
    try:
        data = json.load(f)
    except json.JSONDecodeError:
        print("Error: JSON file is empty or corrupted.")
        data = []

# Validate entries
def validate_entries(entries):
    for i, entry in enumerate(entries):
        if "question" not in entry or "answer" not in entry:
            print(f"Entry {i} is missing required fields.")
        elif not entry["question"].strip() or not entry["answer"].strip():
            print(f"Entry {i} has empty values.")
        else:
            print(f"Entry {i} is valid.")

validate_entries(data)

# Convert to prompt-completion format
formatted = []
for entry in data:
    prompt = f"### Question: {entry['question']}\n### Answer:"
    completion = " " + entry["answer"]
    formatted.append({"prompt": prompt, "completion": completion})

# Save formatted dataset
with open("formatted_qa.json", "w") as f:
    json.dump(formatted, f, indent=2)

print("Formatted dataset saved for fine-tuning.")


Entry 0 is valid.
Entry 1 is valid.
Entry 2 is valid.
Entry 3 is valid.
Entry 4 is valid.
Entry 5 is valid.
Entry 6 is valid.
Formatted dataset saved for fine-tuning.


finetuning the model

In [1]:
!pip install unsloth[colab] -q

  Preparing metadata (setup.py) ... [?25l[?25hdone
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py bdist_wheel[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for xformers (setup.py) ... [?25lerror
[31m  ERROR: Failed building wheel for xformers[0m[31m
[0m[31mERROR: ERROR: Failed to build installable wheels for some pyproject.toml based projects (xformers)[0m[31m
[0m[?25h

Now that the necessary libraries are installed, we can load the model and fine-tune it using the prepared dataset.

In [2]:
!pip install unsloth trl datasets torch



In [6]:
from unsloth import FastLanguageModel
from trl import SFTTrainer
from datasets import load_dataset
import torch
from transformers import TrainingArguments

# Load model and tokenizer with correct model name
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/tinyllama-chat",  # corrected model name
    max_seq_length=2048,
    dtype=torch.float16,
    load_in_4bit=True,
    device_map="auto"
)

# Add LoRA adapters for fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
        "embed_tokens", "lm_head"
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=True,
    random_state=3407,
    use_rslora=False,
    loftq_config=None
)

# Load your formatted dataset
dataset = load_dataset("json", data_files="formatted_qa.json", split="train")

# Combine prompt and completion into a single text field
dataset = dataset.map(lambda x: {
    "text": x["prompt"] + x["completion"]
})

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    max_steps=3,  # increase for longer training
    learning_rate=2e-4,)
from unsloth import FastLanguageModel
from trl import SFTTrainer
from datasets import load_dataset
import torch
from transformers import TrainingArguments

# Load model and tokenizer with correct model name
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/tinyllama-chat",  # corrected model name
    max_seq_length=2048,
    dtype=torch.float16,
    load_in_4bit=True,
    device_map="auto"
)

# Add LoRA adapters for fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
        "embed_tokens", "lm_head"
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing=True,
    random_state=3407,
    use_rslora=False,
    loftq_config=None
)

# Load your formatted dataset
dataset = load_dataset("json", data_files="formatted_qa.json", split="train")

# Combine prompt and completion into a single text field
dataset = dataset.map(lambda x: {
    "text": x["prompt"] + x["completion"]
})

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    max_steps=3,  # increase for longer training
    learning_rate=2e-4,
    fp16=True,
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    report_to="none"
)

# Fine-tune the model
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    args=training_args
)

# Start training
trainer.train()

# Save the fine-tuned model
model.save_pretrained("tinyllama_finetuned")
tokenizer.save_pretrained("tinyllama_finetuned")



🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.9.7: Fast Llama patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/762M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth 2025.9.7 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.


Unsloth: Training embed_tokens in mixed precision to save VRAM
Unsloth: Training lm_head in mixed precision to save VRAM
==((====))==  Unsloth 2025.9.7: Fast Llama patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Training embed_tokens in mixed precision to save VRAM
Unsloth: Training lm_head in mixed precision to save VRAM


Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/7 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 7 | Num Epochs = 3 | Total steps = 3
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 2 x 1) = 8
 "-____-"     Trainable parameters = 143,687,680 of 1,243,736,064 (11.55% trained)


Step,Training Loss
1,2.8788
2,2.0457
3,1.6117


('tinyllama_finetuned/tokenizer_config.json',
 'tinyllama_finetuned/special_tokens_map.json',
 'tinyllama_finetuned/chat_template.jinja',
 'tinyllama_finetuned/tokenizer.model',
 'tinyllama_finetuned/added_tokens.json',
 'tinyllama_finetuned/tokenizer.json')

In [7]:
from transformers import pipeline
from unsloth import FastLanguageModel
import torch

# Load the fine-tuned model using FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Qwen2.5-7B", # Load from saved directory
    max_seq_length=2048,
    dtype=torch.float16,
    load_in_4bit=True, # Load model in 4-bit precision
    device_map="auto"
)

# Create the pipeline from the loaded model and tokenizer
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

response = pipe("### Question: What feed improves milk production?\n### Answer:", max_new_tokens=100)
print(response[0]["generated_text"])

==((====))==  Unsloth 2025.9.7: Fast Qwen2 patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

Device set to use cuda:0


### Question: What feed improves milk production?
### Answer: Alfalfa
### Question: What feed improves milk production?
### Answer: Alfalfa
### Question: What feed improves milk production?
### Answer: Alfalfa
### Question: What feed improves milk production?
### Answer: Alfalfa
### Question: What feed improves milk production?
### Answer: Alfalfa
### Question: What feed improves milk production?
### Answer: Alfalfa
### Question: What feed improves milk production?
### Answer: Alfalfa
### Question: What feed improves milk


In [19]:
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import json
from unsloth import FastLanguageModel
import torch

# # Load fine-tuned model using FastLanguageModel
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name="tinyllama_finetuned",  # Your saved directory
#     max_seq_length=2048,
#     dtype=torch.float16,
#     load_in_4bit=True,
#     device_map="auto",  # Automatically place layers on GPU/CPU
#     llm_int8_enable_fp32_cpu_offload=True  # Allow CPU fallback for layers that don't fit in GPU
# )


# Create the pipeline from the loaded model and tokenizer
model_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)


# Load semantic search model
semantic_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Load Q&A dataset
with open("cattle_farming_qa.json", "r") as f:
    qa_data = json.load(f)

# Prepare semantic embeddings
stored_questions = [entry["question"] for entry in qa_data]
stored_embeddings = semantic_model.encode(stored_questions, convert_to_tensor=True)

# Semantic search function
def find_semantic_match(user_question):
    user_embedding = semantic_model.encode(user_question, convert_to_tensor=True)
    scores = util.cos_sim(user_embedding, stored_embeddings)[0]
    best_idx = scores.argmax().item()
    best_score = scores[best_idx].item()

    if best_score > 0.5:
        return qa_data[best_idx]["answer"]
    else:
        return "Sorry, I couldn't find a good match for that question."

# Test questions
test_questions = [
    "How do I fatten cows quickly?",
    "Best feed during dry season?",
    "How to prevent tick-borne diseases?",
    "What should I do during heavy rains?",
    "How to protect cattle from cold nights?"
]

# Run comparison
for question in test_questions:
    prompt = f"### Question: {question}\n### Answer:"
    model_response = model_pipe(prompt, max_new_tokens=100)[0]["generated_text"]
    semantic_response = find_semantic_match(question)

    print(f"Question: {question}")
    print("Model Answer:")
    print(model_response.strip())
    print("Semantic Search Answer:")
    print(semantic_response.strip())
    print("-" * 60)

Device set to use cuda:0


Question: How do I fatten cows quickly?
Model Answer:
### Question: How do I fatten cows quickly?
### Answer: Make sure they have enough food and water, and keep them clean.
Semantic Search Answer:
Use dry bedding, windbreaks, and ensure shelter is insulated.
------------------------------------------------------------
Question: Best feed during dry season?
Model Answer:
### Question: Best feed during dry season?
### Answer: The best feed for goats during the dry season is a combination of browse and fodder. Browse includes leaves, twigs, and other plant parts, while fodder includes grasses, grains, and other crop residues. These feeds should be provided in equal proportions to ensure that the goats receive the necessary nutrients to maintain their health and productivity.

### Question: How to manage goats during the dry season?
### Answer: To manage goats during the dry season, it is important to provide them with adequate water and feed
Semantic Search Answer:
Feed cattle twice a da

In [20]:
import streamlit as st
import json
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline

# Load semantic search model
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

# Load fine-tuned model
# This should be done outside of the find_semantic_match function to avoid reloading every time
try:
    model_pipe = pipeline("text-generation", model="mistral_finetuned", tokenizer="mistral_finetuned")
    model_loaded = True
except Exception as e:
    st.warning(f"Could not load fine-tuned model: {e}")
    model_loaded = False
    model_pipe = None


# Load Q&A dataset based on domain
def load_qa_data(domain):
    qa_file = f"{domain.lower()}_farming_qa.json"
    try:
        with open(qa_file, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        st.error(f"Q&A file not found for {domain} farming.")
        return []

# Semantic search function
def find_semantic_match(user_question, qa_data, filters=None):
    filtered_data = qa_data

    if filters:
        for key, value in filters.items():
            filtered_data = [entry for entry in filtered_data if entry.get(key) == value]

    if not filtered_data:
        filtered_data = qa_data

    filtered_questions = [entry["question"] for entry in filtered_data]
    # Handle empty filtered_questions to avoid errors
    if not filtered_questions:
        return "Sorry, I couldn't find a good match for that question with the applied filters."

    filtered_embeddings = semantic_model.encode(filtered_questions, convert_to_tensor=True)

    user_embedding = semantic_model.encode(user_question, convert_to_tensor=True)
    scores = util.cos_sim(user_embedding, filtered_embeddings)[0]
    best_idx = scores.argmax().item()
    best_score = scores[best_idx].item()

    if best_score > 0.5:
        return filtered_data[best_idx]["answer"]
    else:
        return "Sorry, I couldn't find a good match for that question."


# Streamlit UI
st.set_page_config(page_title="Smart Livestock Production Tracker", layout="centered")
st.title("🐄 Smart Livestock Production Tracker")
st.subheader("Ask your farming question below:")

# Domain selection
domain = st.selectbox("Choose farming domain", ["Cattle", "Poultry"])

# Load Q&A data for the selected domain
qa_data = load_qa_data(domain)

user_question = st.text_input("Type your question here")

# Optional filters
season = st.selectbox("Season", ["None", "dry", "rainy"])
weather = st.selectbox("Weather Condition", ["None", "rain", "heat", "cold"])
disease = st.text_input("Disease Concern (optional)")

# Build filters dictionary
filters = {}
if season != "None":
    filters["season"] = season
if weather != "None":
    filters["weather"] = weather
if disease.strip():
    filters["disease"] = disease.strip().lower()

use_model = st.checkbox("Use fine-tuned model instead of search", value=model_loaded, disabled=not model_loaded)


if st.button("Get Advice"):
    if user_question.strip() and qa_data:
        if use_model and model_loaded:
            prompt = f"### Domain: {domain}\n### Question: {user_question}\n### Answer:"
            answer = model_pipe(prompt, max_new_tokens=100)[0]["generated_text"]
        else:
            answer = find_semantic_match(user_question, qa_data, filters)
        st.markdown(f"**Advice:** {answer.strip()}")
    else:
        st.warning("Please enter a question first or ensure Q&A data is loaded.")

2025-09-26 08:56:25.804 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-09-26 08:56:25.821 Session state does not function when running a script without `streamlit run`


In [21]:
import streamlit as st
import json
import datetime
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline

# Load semantic search model
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

# Load fine-tuned model
try:
    model_pipe = pipeline("text-generation", model="mistral_finetuned", tokenizer="mistral_finetuned")
    model_loaded = True
except Exception as e:
    st.warning(f"Could not load fine-tuned model: {e}")
    model_loaded = False
    model_pipe = None

# Load Q&A dataset based on domain
def load_qa_data(domain):
    qa_file = f"{domain.lower()}_farming_qa.json"
    try:
        with open(qa_file, "r") as f:
            return json.load(f)
    except FileNotFoundError:
        st.error(f"Q&A file not found for {domain} farming.")
        return []

# Prepare semantic embeddings
# This needs to be done after loading qa_data based on domain
# stored_questions = [entry["question"] for entry in qa_data]
# stored_embeddings = semantic_model.encode(stored_questions, convert_to_tensor=True)

# Semantic search function
def find_semantic_match(user_question, qa_data, filters=None):
    filtered_data = qa_data

    if filters:
        for key, value in filters.items():
            filtered_data = [entry for entry in filtered_data if entry.get(key) == value]

    if not filtered_data:
        filtered_data = qa_data

    filtered_questions = [entry["question"] for entry in filtered_data]
    # Handle empty filtered_questions to avoid errors
    if not filtered_questions:
        return "Sorry, I couldn't find a good match for that question with the applied filters."

    filtered_embeddings = semantic_model.encode(filtered_questions, convert_to_tensor=True)

    user_embedding = semantic_model.encode(user_question, convert_to_tensor=True)
    scores = util.cos_sim(user_embedding, filtered_embeddings)[0]
    best_idx = scores.argmax().item()
    best_score = scores[best_idx].item()

    if best_score > 0.5:
        return filtered_data[best_idx]["answer"]
    else:
        return "Sorry, I couldn't find a good match for that question."

# Feedback logger
def log_feedback(question, answer, feedback, method, domain):
    entry = {
        "timestamp": str(datetime.datetime.now()),
        "question": question,
        "answer": answer,
        "feedback": feedback,
        "method": method,
        "domain": domain
    }
    with open("feedback_log.json", "a") as f:
        f.write(json.dumps(entry) + "\n")

# Usage logger
def log_usage(question, answer, method, domain):
    entry = {
        "timestamp": str(datetime.datetime.now()),
        "question": question,
        "answer": answer,
        "method": method,
        "domain": domain
    }
    with open("usage_log.json", "a") as f:
        f.write(json.dumps(entry) + "\n")

# Streamlit UI
st.set_page_config(page_title="Smart Livestock Production Tracker", layout="centered")
st.title("Smart Livestock Production Tracker")
st.subheader("Ask your farming question below:")

# Domain selection
domain = st.selectbox("Choose farming domain", ["Cattle", "Poultry"])

# Load Q&A data for the selected domain
qa_data = load_qa_data(domain)

user_question = st.text_input("Type your question here")

season = st.selectbox("Season", ["None", "dry", "rainy"])
weather = st.selectbox("Weather Condition", ["None", "rain", "heat", "cold"])
disease = st.text_input("Disease Concern (optional)")

filters = {}
if season != "None":
    filters["season"] = season
if weather != "None":
    filters["weather"] = weather
if disease.strip():
    filters["disease"] = disease.strip().lower()

use_model = st.checkbox("Use fine-tuned model instead of search", value=model_loaded, disabled=not model_loaded)

if st.button("Get Advice"):
    if user_question.strip() and qa_data:
        if use_model and model_loaded:
            prompt = f"### Domain: {domain}\n### Question: {user_question}\n### Answer:"
            answer = model_pipe(prompt, max_new_tokens=100)[0]["generated_text"]
            method = "model"
        else:
            answer = find_semantic_match(user_question, qa_data, filters)
            method = "semantic"

        st.markdown(f"**Advice:** {answer.strip()}")
        log_usage(user_question, answer.strip(), method, domain)

        if st.button("👍 Helpful"):
            log_feedback(user_question, answer.strip(), "helpful", method, domain)
            st.success("Thanks for your feedback!")

        if st.button("👎 Not Helpful"):
            log_feedback(user_question, answer.strip(), "not helpful", method, domain)
            st.info("Thanks for letting us know.")
    else:
        st.warning("Please enter a question first or ensure Q&A data is loaded.")



In [24]:
import streamlit as st
import json
import datetime
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline

# Load semantic search model
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

# Load fine-tuned model (load outside the button click for better performance)
try:
    model_pipe = pipeline("text-generation", model="tinyllama_finetuned", tokenizer="tinyllama_finetuned")
    model_loaded = True
except Exception as e:
    st.warning(f"Could not load fine-tuned model: {e}")
    model_loaded = False
    model_pipe = None

# Load Q&A dataset
with open("cattle_farming_qa.json", "r") as f:
    qa_data = json.load(f)

# Prepare semantic embeddings
stored_questions = [entry["question"] for entry in qa_data]
stored_embeddings = semantic_model.encode(stored_questions, convert_to_tensor=True)

# Semantic search function
def find_semantic_match(user_question, filters=None):
    filtered_data = qa_data

    if filters:
        for key, value in filters.items():
            filtered_data = [entry for entry in filtered_data if entry.get(key) == value]

    if not filtered_data:
        filtered_data = qa_data

    filtered_questions = [entry["question"] for entry in filtered_data]
    filtered_embeddings = semantic_model.encode(filtered_questions, convert_to_tensor=True)

    user_embedding = semantic_model.encode(user_question, convert_to_tensor=True)
    scores = util.cos_sim(user_embedding, filtered_embeddings)[0]
    best_idx = scores.argmax().item()
    best_score = scores[best_idx].item()

    if best_score > 0.5:
        return filtered_data[best_idx]["answer"]
    else:
        return "Sorry, I couldn't find a good match for that question."

# Feedback logger
def log_feedback(question, answer, feedback, method, domain):
    entry = {
        "timestamp": str(datetime.datetime.now()),
        "question": question,
        "answer": answer,
        "feedback": feedback,
        "method": method,
        "domain": domain # Add domain to logging
    }
    with open("feedback_log.json", "a") as f:
        f.write(json.dumps(entry) + "\n")

# Usage logger
def log_usage(question, answer, method, domain):
    entry = {
        "timestamp": str(datetime.datetime.now()),
        "question": question,
        "answer": answer,
        "method": method,
        "domain": domain # Add domain to logging
    }
    with open("usage_log.json", "a") as f:
        f.write(json.dumps(entry) + "\n")

# Streamlit UI
st.set_page_config(page_title="Smart Livestock Production Tracker", layout="centered")
st.title("Smart Livestock Production Tracker")
st.subheader("Ask your farming question below:")

# Domain selection (assuming cattle for now, will add poultry later)
domain = "Cattle" # Hardcode cattle for now

user_question = st.text_input("Type your question here")

season = st.selectbox("Season", ["None", "dry", "rainy"])
weather = st.selectbox("Weather Condition", ["None", "rain", "heat", "cold"])
disease = st.text_input("Disease Concern (optional)")

filters = {}
if season != "None":
    filters["season"] = season
if weather != "None":
    filters["weather"] = weather
if disease.strip():
    filters["disease"] = disease.strip().lower()

use_model = st.checkbox("Use fine-tuned model instead of search", value=model_loaded, disabled=not model_loaded)

if st.button("Get Advice"):
    if user_question.strip():
        if use_model and model_loaded:
            prompt = f"### Question: {user_question}\n### Answer:"
            answer = model_pipe(prompt, max_new_tokens=100)[0]["generated_text"]
            method = "model"
        else:
            answer = find_semantic_match(user_question, filters)
            method = "semantic"

        st.markdown(f"**Advice:** {answer.strip()}")
        log_usage(user_question, answer.strip(), method, domain) # Log with domain

        if st.button("👍 Helpful"):
            log_feedback(user_question, answer.strip(), "helpful", method, domain) # Log with domain
            st.success("Thanks for your feedback!")

        if st.button("👎 Not Helpful"):
            log_feedback(user_question, answer.strip(), "not helpful", method, domain) # Log with domain
            st.info("Thanks for letting us know.")
    else:
        st.warning("Please enter a question first.")



In [37]:
domain = st.selectbox("Choose farming domain", ["Cattle", "Poultry"])
qa_file = "cattle_farming_qa.json" if domain == "Cattle" else "poultry_farming_qa.json"

if use_model:
    prompt = f"### Domain: {domain}\n### Question: {user_question}\n### Answer:"
    answer = model_pipe(prompt, max_new_tokens=100)[0]["generated_text"]
else:
    answer = find_semantic_match(user_question)

#log_feedback(user_question, answer.strip(), "helpful", domain)




In [28]:
def is_cattle_related(question):
    keywords = [
        "cow", "cattle", "milk", "beef", "pasture", "tick", "mastitis", "calf",
        "bull", "heifer", "grazing", "ruminant", "livestock", "hay", "fodder",
        "hoof", "disease", "feed", "drench", "branding", "castration"
    ]
    return any(word in question.lower() for word in keywords)


if st.button("Get Advice"):
    if user_question.strip():
        # domain is hardcoded as "Cattle" in cell ZM6CQrKa58Y5
        domain = "Cattle"
        if not is_cattle_related(user_question):
            st.warning("This system only answers cattle-related questions.")
            st.stop()

        # Proceed with semantic or model response
        if use_model:
            prompt = f"### Domain: {domain}\n### Question: {user_question}\n### Answer:"
            answer = model_pipe(prompt, max_new_tokens=100)[0]["generated_text"]
            method = "model"
        else:
            # qa_data and filters are defined in cell ZM6CQrKa58Y5
            answer = find_semantic_match(user_question, qa_data, filters)
            method = "semantic"

        st.markdown(f"**Advice:** {answer.strip()}")
        log_usage(user_question, answer.strip(), method, domain)

        if st.button("👍 Helpful"):
            log_feedback(user_question, answer.strip(), "helpful", method, domain)
            st.success("Thanks for your feedback!")

        if st.button("👎 Not Helpful"):
            log_feedback(user_question, answer.strip(), "not helpful", method, domain)
            st.info("Thanks for letting us know.")
    else:
        st.warning("Please enter a question first.")



In [43]:
import streamlit as st
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import json
from datetime import datetime
from unsloth import FastLanguageModel
import torch

# Load models
semantic_model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")

# Load fine-tuned model using FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="tinyllama_finetuned", # Load from saved directory
    max_seq_length=2048,
    dtype=torch.float16,
    load_in_4bit=True, # Load model in 4-bit precision
    device_map="auto"
)

# Create the pipeline from the loaded model and tokenizer
model_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)


# UI
st.title("Smart Livestock Advisory System")
domain = st.selectbox("Choose farming domain", ["Cattle", "Poultry"])
use_model = st.checkbox("Use fine-tuned model")
user_question = st.text_input("Ask your question")

# Load domain-specific data
qa_file = "cattle_farming_qa.json" if domain == "Cattle" else "poultry_farming_qa.json"
with open(qa_file, "r") as f:
    qa_data = json.load(f)

stored_questions = [entry["question"] for entry in qa_data]
stored_embeddings = semantic_model.encode(stored_questions, convert_to_tensor=True)

def find_semantic_match(question):
    query_embedding = semantic_model.encode(question, convert_to_tensor=True)
    scores = util.cos_sim(query_embedding, stored_embeddings)[0]
    best_idx = scores.argmax().item()
    best_score = scores[best_idx].item()
    return qa_data[best_idx]["answer"] if best_score > 0.5 else "Sorry, I couldn't find a good match."

def log_feedback(question, answer, rating, domain):
    feedback = {
        "question": question,
        "answer": answer,
        "rating": rating,
        "domain": domain,
        "timestamp": datetime.now().isoformat()
    }
    with open("feedback_log.json", "a") as f:
        f.write(json.dumps(feedback) + "\n")

# Generate answer
if user_question:
    if use_model:
        prompt = f"### Domain: {domain}\n### Question: {user_question}\n### Answer:"
        response = model_pipe(prompt, max_new_tokens=100)
        answer = response[0].get("generated_text", "").strip()
    else:
        answer = find_semantic_match(user_question)

    st.markdown(f"**Answer:** {answer}")
    log_feedback(user_question, answer, "helpful", domain)

==((====))==  Unsloth 2025.9.7: Fast Llama patching. Transformers: 4.55.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Device set to use cuda:0


In [49]:
!pip show streamlit


Name: streamlit
Version: 1.50.0
Summary: A faster way to build and share data apps
Home-page: https://streamlit.io
Author: Snowflake Inc
Author-email: hello@streamlit.io
License: Apache License 2.0
Location: /usr/local/lib/python3.12/dist-packages
Requires: altair, blinker, cachetools, click, gitpython, numpy, packaging, pandas, pillow, protobuf, pyarrow, pydeck, requests, tenacity, toml, tornado, typing-extensions, watchdog
Required-by: 
