In [1]:
import unsloth
import transformers
import trl

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-08-31 22:33:42.474342: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-31 22:33:42.485594: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1756659822.499761    9344 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1756659822.504318    9344 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1756659822.515447    9344 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:


import unsloth
from unsloth import FastModel
import torch
import os

fourbit_models = [
    # 4bit dynamic quants for superior accuracy and low memory use
    "unsloth/gemma-3n-E4B-it-unsloth-bnb-4bit",
    "unsloth/gemma-3n-E2B-it-unsloth-bnb-4bit",
    "unsloth/gemma-3n-E4B-unsloth-bnb-4bit",
    "unsloth/gemma-3n-E2B-unsloth-bnb-4bit",
    "unsloth/gemma-3-1b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-4b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-12b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",
    "unsloth/gemma-3-27b-it-unsloth-bnb-4bit",
]   

# model, tokenizer = FastModel.from_pretrained(
#     model_name = "unsloth/gemma-3n-E2B-it",
#     dtype = None, 
#     max_seq_length = 1024, 
#     load_in_4bit = True,  
#     full_finetuning = False
# )


model, tokenizer = FastModel.from_pretrained(
    model_name="unsloth/gemma-3-1b-it",
    dtype=torch.float16,  # Use a lower precision for computation
    max_seq_length=1024,
    load_in_4bit=True,
    full_finetuning=False,
    # llm_int8_enable_fp32_cpu_offload=True,
    # device_map="auto"  # Automatically assigns layers to available devices
)






==((====))==  Unsloth 2025.8.10: Fast Gemma3 patching. Transformers: 4.56.0.
   \\   /|    NVIDIA GeForce RTX 3050 Laptop GPU. Num GPUs = 1. Max memory: 3.68 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.6. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


In [3]:
# from transformers import AutoModelForCausalLM, AutoTokenizer
# from unsloth import FastModel
# import torch

# # Load the model and tokenizer manually
# model_name = "unsloth/gemma-3n-E2B-it"
# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     device_map="auto",  # Automatically assign layers to available devices
#     torch_dtype=torch.float16,  # Use float16 for reduced memory usage
#     load_in_4bit=True,  # Load the model in 4-bit precision
#     trust_remote_code=True  # Trust the remote code for model loading
# )
# tokenizer = AutoTokenizer.from_pretrained(model_name)

# # Convert the model into a FastModel forz optimized inference
# fast_model = FastModel.from_model(model)

# # Now you can use fast_model and tokenizer for inference


In [4]:
# !pip install unsloth

# !pip install --no-deps --upgrade transformers
# !pip install --no-deps --upgrade timm

In [5]:
# pip install unsloth_zoo

In [6]:
# !pip install peft

from peft import LoraConfig, get_peft_model
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# # Reduce VRAM usage
# torch.cuda.empty_cache()


In [7]:
from transformers import TextStreamer
import gc

def do_gemma_3n_inference(model, tokenizer, messages, max_new_tokens = 128):
    
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt = True, 
        tokenize = True,
        return_dict = True,
        return_tensors = "pt",
    ).to("cuda")
    
    output_ids = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=1.0,
        top_p=0.95,
        top_k=64,
        do_sample=True
    )

    generated_text = tokenizer.decode(
        output_ids[0][inputs['input_ids'].shape[-1]:], 
        skip_special_tokens=True
    )
    
    # Cleanup to reduce VRAM usage
    del inputs
    torch.cuda.empty_cache()
    gc.collect()

    return generated_text

In [8]:
import torch
import gc

import torch._dynamo
torch._dynamo.config.suppress_errors = True
torch._dynamo.config.cache_size_limit = 512

In [9]:
def create_unified_training_data(df1, df2):
    """Combine both datasets into a unified training format"""
    
    print("🔄 Creating unified training dataset...")
    
    unified_data = []
    
    # Process Dataset 1 (Detailed)
    print("Processing detailed dataset...")
    for _, row in df1.iterrows():
        # Basic recommendation
        basic_prompt = (
            f"N: {row['Nitrogen']}, P: {row['Phosphorus']}, K: {row['Potassium']}, "
            f"temperature: {row['Temperature']:.2f}, humidity: {row['Humidity']:.2f}, "
            f"pH: {row['pH_Value']:.2f}, rainfall: {row['Rainfall']:.2f}"
        )
        basic_response = f"Recommended crop: {row['Crop']}"
        
        # Detailed recommendation with soil type and variety
        detailed_prompt = (
            f"N: {row['Nitrogen']}, P: {row['Phosphorus']}, K: {row['Potassium']}, "
            f"temperature: {row['Temperature']:.2f}, humidity: {row['Humidity']:.2f}, "
            f"pH: {row['pH_Value']:.2f}, rainfall: {row['Rainfall']:.2f}, "
            f"soil_type: {row['Soil_Type']}"
        )
        detailed_response = (
            f"Recommended crop: {row['Crop']}. "
            f"Suitable soil type: {row['Soil_Type']}. "
            f"Recommended variety: {row['Variety']}. "
            f"This combination is optimal for the given soil and climate conditions."
        )
        
        # Add both variations
        unified_data.append({"prompt": basic_prompt, "response": basic_response, "source": "dataset1_basic"})
        unified_data.append({"prompt": detailed_prompt, "response": detailed_response, "source": "dataset1_detailed"})
    
    # Process Dataset 2 (Simple)
    print("Processing simple dataset...")
    for _, row in df2.iterrows():
        prompt = (
            f"N: {row['N']}, P: {row['P']}, K: {row['K']}, "
            f"temperature: {row['temperature']:.2f}, humidity: {row['humidity']:.2f}, "
            f"pH: {row['ph']:.2f}, rainfall: {row['rainfall']:.2f}"
        )
        response = f"Recommended crop: {row['label']}"
        
        unified_data.append({"prompt": prompt, "response": response, "source": "dataset2"})
    
    print(f"✅ Created {len(unified_data)} training examples")
    return unified_data

In [10]:
import pandas as pd
df2 = pd.read_csv('Crop_recommendation.csv')
df1  = pd.read_csv('sensor_Crop_Dataset.csv')
unified_data = create_unified_training_data(df1, df2)

🔄 Creating unified training dataset...
Processing detailed dataset...
Processing simple dataset...
✅ Created 42200 training examples


In [11]:
df1.head()

Unnamed: 0,Nitrogen,Phosphorus,Potassium,Temperature,Humidity,pH_Value,Rainfall,Crop,Soil_Type,Variety
0,69.074766,53.954402,88.067625,17.261834,72.941652,4.631301,302.842639,Wheat,Clay,Soft Red
1,107.329352,70.102134,32.081067,21.846116,99.361954,4.761658,94.693847,Tomato,Clay,Beefsteak
2,130.634624,67.204533,28.294252,33.246895,81.506836,6.566007,83.563685,Sugarcane,Clay,Co 86032
3,15.169301,87.493181,14.336679,14.396289,59.274465,6.296297,31.508836,Sugarcane,Silt,Co 0238
4,21.881965,89.269712,38.833885,16.773218,51.191584,8.268274,295.193482,Maize,Sandy,Sweet


In [12]:
# unified_data

In [15]:
import json
    
    
print("🚀 UNIFIED MODEL TRAINING")
print("="*40)

# Create unified dataset
unified_data = create_unified_training_data(df1, df2)

# Save as JSONL
with open("unified_crop_training.jsonl", "w") as f:
    for item in unified_data:
        f.write(json.dumps(item) + "\n")

# Load with datasets
from datasets import load_dataset
dataset = load_dataset("json", data_files="unified_crop_training.jsonl")["train"]

# Format for training
def formatting_prompts_func(examples):
    prompts = examples["prompt"]
    responses = examples["response"]
    texts = [
        tokenizer.apply_chat_template(
            [{"role": "user", "content": p}, {"role": "assistant", "content": r}],
            tokenize=False,
            add_generation_prompt=False
        ).removeprefix("<bos>")
        for p, r in zip(prompts, responses)
    ]
    return {"text": texts}

formatted_dataset = dataset.map(formatting_prompts_func, batched=True)

# Train the model
#     from peft import LoraConfig, get_peft_model    
#     model = FastModel.get_peft_model(
#     model,
#     finetune_vision_layers     = False, # Turn off for just text!
#     finetune_language_layers   = True,  # Should leave on!
#     finetune_attention_modules = True,  # Attention good for GRPO
#     finetune_mlp_modules       = True,  # Should leave on always!

#     r = 8,           # Larger = higher accuracy, but might overfit
#     lora_alpha = 8,  # Recommended alpha == r at least
#     lora_dropout = 0,
#     bias = "none",
#     random_state = 3407,
# )
from peft import LoraConfig, get_peft_model
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

from trl import SFTTrainer, SFTConfig

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    args=SFTConfig(
        dataset_text_field="text",
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=10,  # Increased for larger dataset
        max_steps=120,    # Increased for more data
        learning_rate=2e-4,
        logging_steps=1,
        optim="paged_adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        report_to="none",
    ),
)

print(f"Training on {len(formatted_dataset)} examples...")
trainer.train()
    
   

🚀 UNIFIED MODEL TRAINING
🔄 Creating unified training dataset...
Processing detailed dataset...
Processing simple dataset...
✅ Created 42200 training examples


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/42200 [00:00<?, ? examples/s]



Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"] (num_proc=16):   0%|          | 0/42200 [00:00<?, ? examples/s]

Training on 42200 examples...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 42,200 | Num Epochs = 1 | Total steps = 120
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 1,490,944 of 1,001,376,896 (0.15% trained)


Step,Training Loss,entropy
1,4.0563,0
2,4.3498,No Log
3,4.376,No Log
4,4.0049,No Log
5,3.9555,No Log
6,3.8403,No Log
7,3.9196,No Log
8,3.6325,No Log
9,3.4494,No Log
10,3.2465,No Log


TrainOutput(global_step=120, training_loss=1.8107136756181716, metrics={'train_runtime': 121.9869, 'train_samples_per_second': 3.935, 'train_steps_per_second': 0.984, 'total_flos': 383510264610816.0, 'train_loss': 1.8107136756181716, 'epoch': 0.011374407582938388})

In [20]:
trainer.model.save_pretrained("./unified_crop_model")
trainer.processing_class.save_pretrained("./unified_crop_model")

('./unified_crop_model/tokenizer_config.json',
 './unified_crop_model/special_tokens_map.json',
 './unified_crop_model/chat_template.jinja',
 './unified_crop_model/tokenizer.model',
 './unified_crop_model/added_tokens.json',
 './unified_crop_model/tokenizer.json')

In [25]:
 
torch.cuda.empty_cache()
gc.collect()
from unsloth.chat_templates import get_chat_template

messages = [
    {
        "role": "system",
        "content": [{"type": "text", "text": "You are a helpful assistant that recommends crops based on soil and climate."}]
    },
    {
        "role": "user",
        "content": [{"type": "text", "text": "N: 80, P: 38, K: 67, temperature: 32.01, humidity: 58, pH: 6.90, rainfall: 302.93"}]
    }
]

inputs = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt = True, 
    return_tensors = "pt",
    tokenize = True,
    return_dict = True,
).to("cuda")
outputs = model.generate(
    **inputs,
    max_new_tokens = 128, 
    # Recommended Gemma-3 settings!
    temperature = 1.0, top_p = 0.95, top_k = 64,
)
tokenizer.batch_decode(outputs)

AttributeError: 'SlidingWindowLayer' object has no attribute 'max_batch_size'