In [None]:
!pip install transformers datasets accelerate peft flask pyngrok torch


Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Coll

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, TaskType

# Load dataset (ensure your CSV is correctly uploaded)
dataset = load_dataset('csv', data_files='/content/custom_dataset.csv')

# Tokenizer setup
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

def tokenize(batch):
    inputs = tokenizer(batch["input_text"], padding="max_length", truncation=True, max_length=64)
    targets = tokenizer(batch["target_text"], padding="max_length", truncation=True, max_length=64)
    inputs["labels"] = targets["input_ids"]
    return inputs

tokenized_dataset = dataset.map(tokenize, batched=True, remove_columns=["input_text", "target_text"])

# Load GPT-2 model
base_model = AutoModelForCausalLM.from_pretrained("gpt2")

# LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1
)

# Apply LoRA
model = get_peft_model(base_model, lora_config)

# Training arguments
training_args = TrainingArguments(
    output_dir="./fine_tuned_gpt2",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    evaluation_strategy="epoch",
    logging_steps=5,
    save_strategy="epoch",
    learning_rate=2e-4,
    weight_decay=0.01,
    fp16=True,
    save_total_limit=2
)

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["train"].shuffle(seed=42).select(range(2)),
    tokenizer=tokenizer,
    data_collator=data_collator
)

# Start fine-tuning
trainer.train()

# Save model and tokenizer
model.save_pretrained("./gpt2_lora")
tokenizer.save_pretrained("./gpt2_tokenizer")


Generating train split: 0 examples [00:00, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

  trainer = Trainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mhrishikesh1773[0m ([33mhrishikesh1773-university-of-missouri-kansas-city[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Epoch,Training Loss,Validation Loss
1,No log,5.151693
2,4.407900,5.11221
3,4.407900,5.094381


('./gpt2_tokenizer/tokenizer_config.json',
 './gpt2_tokenizer/special_tokens_map.json',
 './gpt2_tokenizer/vocab.json',
 './gpt2_tokenizer/merges.txt',
 './gpt2_tokenizer/added_tokens.json',
 './gpt2_tokenizer/tokenizer.json')

In [None]:
!pip install flask pyngrok transformers peft torch

from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from pyngrok import ngrok
import torch

app = Flask(__name__)

# ngrok setup (replace your token here)
ngrok.set_auth_token("2u0DvqTmOHf0B8fbJ9GRX4bo1pP")
# ngrok.set_default_region("in")

# Load GPT-2 tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("./gpt2_tokenizer")
tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained("gpt2")
model = PeftModel.from_pretrained(base_model, "./gpt2_lora")
model.eval()

@app.route('/query', methods=['POST'])
def query():
    prompt = request.json.get("prompt").strip()

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=128
    )

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_length=128,               # increased length for complete answer
            temperature=0.7,              # balanced creativity
            top_p=0.9,                    # reduce repetitive output
            num_beams=3,                  # improves coherent sentence completion
            no_repeat_ngram_size=2,       # prevents phrase repetition
            early_stopping=True
        )

    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

    # remove prompt repetition clearly
    if response_text.lower().startswith(prompt.lower()):
        response_text = response_text[len(prompt):].strip()

    return jsonify({"response": response_text})

if __name__ == "__main__":
    public_url = ngrok.connect(5000).public_url
    print(f"🚀 API publicly accessible at: {public_url}/query")
    app.run(host="0.0.0.0", port=5000, debug=False)





🚀 API publicly accessible at: https://5917-35-197-73-203.ngrok-free.app/query
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
INFO:werkzeug:127.0.0.1 - - [07/Mar/2025 21:00:21] "POST /query HTTP/1.1" 200 -
