In [3]:
!pip install chromadb crewai crewai['tools'] datasets langchain-google-genai matplotlib pandas pydantic scikit-learn seaborn torch transformers trl tqdm unsloth




In [4]:
from crewai.tools import BaseTool
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    AutoModelForTokenClassification,
    Trainer,
    TrainingArguments,
)
from datasets import load_dataset
import os
import json


class FTTool(BaseTool):
    name: str = "FTTool"
    description: str = (
        "Fine-tunes a HuggingFace model for a given task type. "
        "Requires: model_name, dataset_name_or_path, task_type (causal_lm, sequence_classification, token_classification, etc.), "
        "and optional training arguments."
    )

    def _run(
        self, model_name: str, dataset_name_or_path: str, task_type: str, **kwargs
    ) -> str:
        try:
            model_loader_map = {
                "causal_lm": AutoModelForCausalLM,
                "sequence_classification": AutoModelForSequenceClassification,
                "token_classification": AutoModelForTokenClassification,
            }
            if task_type not in model_loader_map:
                return json.dumps(
                    {
                        "status": "error",
                        "message": f"Unsupported task_type '{task_type}'. Supported: {list(model_loader_map.keys())}",
                    },
                    indent=2,
                )

            tokenizer = AutoTokenizer.from_pretrained(model_name)
            if task_type == "causal_lm" and tokenizer.pad_token is None:
                tokenizer.pad_token = tokenizer.eos_token

            model = model_loader_map[task_type].from_pretrained(model_name)

            # Load dataset
            if os.path.exists(dataset_name_or_path):
                dataset = load_dataset("json", data_files=dataset_name_or_path)
            else:
                dataset = load_dataset(dataset_name_or_path)

            # Ensure validation split exists
            if "validation" not in dataset:
                if "test" in dataset:
                    dataset["validation"] = dataset["test"]
                elif "train" in dataset:
                    split_dataset = dataset["train"].train_test_split(test_size=0.1)
                    dataset["train"] = split_dataset["train"]
                    dataset["validation"] = split_dataset["test"]

            def tokenize_function(examples):
                max_len = kwargs.get(
                    "max_length", 512 if task_type == "causal_lm" else 256
                )
                return tokenizer(
                    examples["text"],
                    truncation=True,
                    padding="max_length",
                    max_length=max_len,
                )

            tokenized_datasets = dataset.map(tokenize_function, batched=True)

            # Adjust evaluation strategy
            eval_strategy = kwargs.get(
                "evaluation_strategy",
                "epoch" if "validation" in tokenized_datasets else "no",
            )

            output_dir = "./output/fine_tuning_results"
            train_args = TrainingArguments(
                output_dir=output_dir,
                eval_strategy=eval_strategy,
                learning_rate=kwargs.get("learning_rate", 5e-5),
                per_device_train_batch_size=kwargs.get("train_batch_size", 4),
                per_device_eval_batch_size=kwargs.get("eval_batch_size", 4),
                num_train_epochs=kwargs.get("num_epochs", 3),
                weight_decay=kwargs.get("weight_decay", 0.01),
                logging_dir=os.path.join(output_dir, "logs"),
                logging_steps=kwargs.get("logging_steps", 50),
                save_strategy="epoch",
                report_to=kwargs.get("report_to", "none"),
                push_to_hub=False,
            )

            trainer = Trainer(
                model=model,
                args=train_args,
                train_dataset=tokenized_datasets["train"],
                eval_dataset=tokenized_datasets.get("validation"),
                tokenizer=tokenizer,
            )

            trainer.train()

            model.save_pretrained(output_dir)
            tokenizer.save_pretrained(output_dir)

            return json.dumps(
                {
                    "status": "success",
                    "output_dir": output_dir,
                    "message": f"Fine-tuned {model_name} for {task_type} saved at {output_dir}",
                },
                indent=2,
            )

        except Exception as e:
            return json.dumps({"status": "error", "message": str(e)}, indent=2)






from crewai import Agent, Task, Crew, LLM
from dotenv import load_dotenv
import re
import os
import json
import pandas as pd
load_dotenv()

os.environ["OPENAI_API_KEY"] = "dummy"
os.environ["GEMINI_API_KEY"] = "GEMINI_API_KEY"

# LLM
llm = LLM(
    model="gemini/gemini-2.5-flash"
)


# TOOLS
ft_tool = FTTool()


# INPUT
ft_model_name = input("Enter the name of the model: ")
ft_task_type = input("Enter task type (causal_lm, sequence_classification, token_classification): ")
ft_dataset = input("Enter dataset name or path: ")


ft_agent = Agent(
    role="HuggingFace Fine-Tuning Expert",
    goal=(
        """Act as a top-tier HuggingFace trainer capable of orchestrating the entire fine-tuning pipeline.
        Guide the user to select a valid HuggingFace task type ('causal_lm', 'sequence_classification', or 'token_classification'),
        validate all provided inputs including model_name, dataset_name_or_path, and training arguments,
        auto-correct obvious parameter mistakes, dynamically load the correct transformer model class,
        prepare datasets with optimal tokenization strategies for the task type,
        configure sensible but high-performing defaults for TrainingArguments while honoring user overrides,
        and execute fine-tuning exclusively using the FTTool.
        Return only the exact JSON object provided by FTTool — no extra commentary, formatting, or explanation."""
    ),
    backstory=(
        """You are an elite machine learning architect with deep expertise in HuggingFace Transformers,
        specializing in maximizing training efficiency and model performance.
        Over the years, you’ve orchestrated countless fine-tuning runs across NLP tasks,
        balancing precision engineering with rapid prototyping.
        You are obsessive about parameter validation, preventing wasted GPU cycles,
        and ensuring reproducibility. Your workflow is surgical:
        prompt, validate, adapt, execute, and deliver — with zero noise in the output."""
    ),
    verbose=True,
    llm=llm,
)


ft_task_transformer = Task(
    description=f"""
        Prompt the user to choose the HuggingFace task type they want to perform (options: "causal_lm", "sequence_classification", or "token_classification").
        Once the task type is selected, automatically collect and validate all required parameters:
        a. model_name (string, valid HuggingFace model ID or local path)
        b. dataset_name_or_path (string, valid HuggingFace dataset name or local file path)
        c. task_type (string, must be one of the supported task types)
        d. optional training arguments (learning_rate, num_epochs, train_batch_size, eval_batch_size, max_length, evaluation_strategy, weight_decay, logging_steps).

        Fine-tune the HuggingFace model {ft_model_name} for the task {ft_task_type} using the dataset {ft_dataset}.
        Use the FineTuneTool to configure and run training. Pass model_name='{ft_model_name}, dataset_name_or_path={ft_dataset}, task_type={ft_task_type}, and any additional training arguments.

        After collecting and validating inputs:
        1. Dynamically load the correct model class based on task_type:
            a. causal_lm -> AutoModelForCausalLM
            b. sequence_classification -> AutoModelForSequenceClassification
            c. token_classification -> AutoModelForTokenClassification
        2. Load and tokenize the dataset according to task_type, applying appropriate truncation, padding, and max_length rules.
        3. Automatically configure TrainingArguments with sensible defaults, allowing overrides from user-supplied training arguments.
        4. Run fine-tuning using the FTTool, passing model_name, dataset_name_or_path, task_type, and all additional arguments to `_run()`.
        5. On completion, return **only** the JSON string returned by FTTool containing:
            a. "status" (success or error)
            b. "output_dir" (model save path)
            c. "message" (summary of fine-tuning)
        Do not include any additional text, explanations, or formatting outside the returned JSON.""",
    expected_output="""A single JSON object returned by the FineTuneTool containing exactly these keys:
        model_save_path(string) – the path where the fine-tuned model is stored, training_details(object) – metrics and configuration used during training,and status (string) – the final training status (e.g., 'success', 'failed').
        No extra text, explanations, or formatting outside of the JSON object.""",
    tools=[ft_tool],
    agent=ft_agent,
)


# CREW
crew = Crew(
    agents=[ft_agent],
    tasks=[ft_task_transformer],
    verbose=True,
)


# KICKOFF
result = crew.kickoff()


Enter the name of the model: distilbert-base-uncased
Enter task type (causal_lm, sequence_classification, token_classification): sequence_classification
Enter dataset name or path: imdb


Output()

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,0.2507,0.452197
2,0.2661,0.523511
3,0.0429,0.575472


Output()

In [12]:
from transformers import pipeline

model_path = "./output/fine_tuning_results"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

In [13]:
classifier = pipeline("text-classification",
    model=model,
    tokenizer=tokenizer
)

text1 = "Thug life is one of the worst movie of all time"
text2 = "It's the best movie i have ever seen in my life"
result1 = classifier(text1)
result2 = classifier(text2)
print(result1)
print(result2)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'label': 'LABEL_0', 'score': 0.9996126294136047}]
[{'label': 'LABEL_1', 'score': 0.9997625946998596}]
