## 1. Install Dependencies

In [2]:
!pip install -q transformers peft datasets bitsandbytes accelerate

## 2. Imports

In [3]:
import json
import re
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorWithPadding
from peft import LoraConfig, get_peft_model
import torch

## 3. Load & Preprocess Chat Data

In [4]:
# Example: Slack JSON export (replace 'slack_export.json' with your file)
# Slack/Teams exports usually look like:
# [
#   {"user": "Alice", "text": "Hello Bob", "ts": "1680000000.000000"},
#   {"user": "Bob", "text": "Hey Alice!", "ts": "1680000001.000000"}
# ]
#
# Upload your export file to Colab: Runtime → Files → Upload
CHAT_FILE = "/content/slack_export.json"  # change this

with open(CHAT_FILE, "r") as f:
    raw_data = json.load(f)

# Remove system messages and empty texts
cleaned = [
    msg for msg in raw_data
    if msg.get("text") and not msg["text"].startswith("<@") and "joined" not in msg["text"].lower()
]

# Convert into conversation turns (instruction → response)
pairs = []
for i in range(len(cleaned) - 1):
    current = cleaned[i]
    nxt = cleaned[i + 1]
    if current["user"] != nxt["user"]:  # only consecutive different speakers
        instruction = f"{current['user']}: {current['text']}\n{nxt['user']}:"
        output = nxt["text"]
        pairs.append({
            "instruction": "Respond in the style of our team chat",
            "input": instruction,
            "output": output
        })

print(f"Prepared {len(pairs)} conversation pairs.")

Prepared 810 conversation pairs.


## 4. Create Dataset

In [5]:
dataset = Dataset.from_list(pairs)

## 5. Tokenization Function

In [6]:
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_fn(example):
    prompt = f"{example['instruction']}\n{example['input']}"
    labels = example['output']
    text = f"{prompt}\n{labels}"
    tokenized = tokenizer(text, padding='max_length', truncation=True, max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_ds = dataset.map(tokenize_fn)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

Map:   0%|          | 0/810 [00:00<?, ? examples/s]

## 6. Load Model with QLoRA

In [7]:
from peft import LoraConfig

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    #load_in_4bit=True,
    device_map="auto"
)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj","v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

## 7. Training

In [8]:
training_args = TrainingArguments(
    output_dir="tinyllama-slack",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=200,
    save_total_limit=2
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds
)

trainer.train()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mshahriar-arpon[0m ([33mshahriar-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,12.5161
20,1.7947
30,0.2367
40,0.1514
50,0.0964
60,0.0576
70,0.0307
80,0.0204
90,0.0183
100,0.0174


TrainOutput(global_step=306, training_loss=0.4988592133364257, metrics={'train_runtime': 844.8842, 'train_samples_per_second': 2.876, 'train_steps_per_second': 0.362, 'total_flos': 7739410627952640.0, 'train_loss': 0.4988592133364257, 'epoch': 3.0})

## 8. Save LoRA Adapters

In [9]:
model.save_pretrained("tinyllama-slack-lora")
tokenizer.save_pretrained("tinyllama-slack-lora")
print("✅ Fine-tuning complete. Adapters saved to 'tinyllama-slack-lora'.")

✅ Fine-tuning complete. Adapters saved to 'tinyllama-slack-lora'.


## 9. Inference Example

In [15]:
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    torch_dtype=torch.float16
)
ft_model = PeftModel.from_pretrained(base_model, "tinyllama-slack-lora")

prompt = "You are answering a question like a conversation.\n Dana: We need to update the docs. \nAlice:"
inputs = tokenizer(prompt, return_tensors="pt").to(ft_model.device)
outputs = ft_model.generate(**inputs, max_new_tokens=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

You are answering a question like a conversation.
 Dana: We need to update the docs. 
Alice:
The API is returning a 200 status code.
