üêÑ T5 Model for Cattle Price Prediction

üì¶ Installation

In [None]:
!pip install -q transformers datasets accelerate

üìÅ Dataset Loading and Preparation

In [None]:
import json
import pandas as pd

# Load the dataset
with open("/kaggle/input/dataset-cattle/cattle_training_dataset.json") as f:
    data = json.load(f)

# Flatten and convert
records = []
for item in data:
    inp = item['input']
    text = f"sex: {inp['sex']}, age: {inp['age']}, health: {';'.join(inp['health'])}"
    records.append({"input_text": text, "target_text": str(item['output'])})

df = pd.DataFrame(records)
df.head()

In [4]:
from datasets import Dataset

dataset = Dataset.from_pandas(df)
dataset = dataset.train_test_split(test_size=0.1)

üßπ Tokenization and Preprocessing

In [None]:
from transformers import T5Tokenizer

model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)

def preprocess(example):
    model_input = tokenizer(example['input_text'], truncation=True, padding="max_length", max_length=128)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(example['target_text'], truncation=True, padding="max_length", max_length=10)
    model_input['labels'] = labels['input_ids']
    return model_input

tokenized = dataset.map(preprocess)

üß† Training the Model

In [None]:
from transformers import T5ForConditionalGeneration, TrainingArguments, Trainer, DataCollatorForSeq2Seq

model = T5ForConditionalGeneration.from_pretrained(model_name)

training_args = TrainingArguments(
    output_dir="./results",
    do_eval=True,
    num_train_epochs=10,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_steps=10,
    save_steps=500,
    disable_tqdm=False,         # ‚úÖ Forces progress bar to show in notebook
    report_to="none"            # ‚úÖ Avoids WandB issues
)


data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    
)

trainer.train()


üíæ Save and Reload Model

In [None]:
model.save_pretrained("/kaggle/working/t5-livestock")
tokenizer.save_pretrained("/kaggle/working/t5-livestock")

üîÆ Inference Function

In [None]:
def format_input(data: dict) -> str:
    """
    Converts a structured input dict into the expected T5 input format string.
    """
    sex = data.get("sex", "Unknown")
    age = data.get("age", "Unknown")
    health = ";".join(data.get("health", []))
    return f"sex: {sex}, age: {age}, health: {health}"

import torch

def predict_price_from_json(input_data: dict):
    """
    Takes a JSON-like dict and predicts the price using the fine-tuned T5 model.
    Ensures that input tensors are on the same device as the model.
    """
    input_text = format_input(input_data)
    
    # Detect device (CPU or CUDA)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Move model to the correct device
    model.to(device)
    
    # Tokenize and move inputs to the same device
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Generate prediction
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# ‚úÖ Example JSON input
example_json = {
    "sex": "Male",
    "age": "2Y",
    "health": ["healthy"]
}

predicted_price = predict_price_from_json(example_json)
print("üí∞ Predicted price:", predicted_price)


üöÄ Upload to Hugging Face Hub

In [None]:
!pip install -q huggingface_hub

In [None]:
from huggingface_hub import HfApi, HfFolder
from dotenv import load_dotenv
import os

# Load environment variables from .env
load_dotenv()
hf_token = os.getenv("HF_TOKEN")
# Paste your token here (keep it secret!)

# Save it to the Hugging Face config
HfFolder.save_token(hf_token)

# Optional: test auth
api = HfApi()
user = api.whoami()
print("‚úÖ Logged in as:", user["name"])


In [None]:
from huggingface_hub import notebook_login
notebook_login()

model.push_to_hub("t5-cattle-price")
tokenizer.push_to_hub("t5-cattle-price")


üåê Flask API for Deployment

In [None]:
from flask import Flask, request, jsonify
from transformers import T5Tokenizer, T5ForConditionalGeneration

# ‚úÖ Load once when the app starts
tokenizer = T5Tokenizer.from_pretrained("your-username/t5-cattle-price")
model = T5ForConditionalGeneration.from_pretrained("your-username/t5-cattle-price")

def format_input(data):
    return f"sex: {data['sex']}, age: {data['age']}, health: {';'.join(data['health'])}"

def predict_price(data):
    input_text = format_input(data)
    inputs = tokenizer(input_text, return_tensors="pt", padding="max_length", truncation=True, max_length=128)
    outputs = model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

app = Flask(__name__)

@app.route("/predict", methods=["POST"])
def predict():
    data = request.get_json()
    predicted_price = predict_price(data)
    return jsonify({"price": predicted_price})

if __name__ == "__main__":
    app.run(debug=True)
