In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/stories/preprocessed.md


In [3]:
!pip install -U bitsandbytes


Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.2


In [4]:
import os
# os.environ["PYTORCH_SDP_DISALLOW_FLASH_ATTENTION"] = "1"
# os.environ["PYTORCH_SDP_DISALLOW_MEM_EFFICIENT_ATTENTION"] = "1"
# os.environ["PYTORCH_SDP_DISALLOW_MATH_FALLBACK"] = "1"

# 1. Force synchronous CUDA operations for better traceback
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# 2. Disable W&B
os.environ["WANDB_DISABLED"] = "true"

# 3. (Optional) Disable Flash / SDPA if you're seeing device-side assert errors
import torch
# torch.backends.cuda.enable_flash_sdp(False)
# torch.backends.cuda.enable_math_sdp(False)
# torch.backends.cuda.enable_mem_efficient_sdp(False)

print("Environment variables set up. Next, let's import libraries...")


Environment variables set up. Next, let's import libraries...


In [None]:
import torch
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    DataCollatorForLanguageModeling, 
    Trainer, 
    TrainingArguments, 
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset
# Disable all forms of SDPA / flash attention:
# torch.backends.cuda.enable_flash_sdp(False)
# torch.backends.cuda.enable_math_sdp(False)
# torch.backends.cuda.enable_mem_efficient_sdp(False)
print("Imports successful.")

# Confirm versions:
import sys
print("Python version:", sys.version)
print("Torch version:", torch.__version__)


In [None]:
model_checkpoint = "sarvamai/sarvam-1"
hf_token = "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"

print("Loading tokenizer from:", model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(
    model_checkpoint, 
    token=hf_token, 
    trust_remote_code=True
)

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

print("Tokenizer loaded. Vocabulary size:", tokenizer.vocab_size)


In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True, 
    llm_int8_enable_fp32_cpu_offload=True
)

max_memory = {
    0: "3GiB",    # GPU
    "cpu": "15GiB"
}

print("BitsAndBytes config for 8-bit + CPU offload created.")


In [None]:
print("Loading model from:", model_checkpoint)

try:
    model = AutoModelForCausalLM.from_pretrained(
        model_checkpoint,
        token=hf_token,
        trust_remote_code=True,
        device_map="auto",
        quantization_config=bnb_config,
        max_memory=max_memory
    )
    print("Model loaded successfully.")
except Exception as e:
    print("Error loading model:", e)
    raise

# if hasattr(model, "config"):
#     if hasattr(model.config, "_attn_implementation"):
#         model.config._attn_implementation = "math"  # or "math"
#     if hasattr(model.config, "attn_config"):
#         model.config.attn_config = {"attn_impl": "math"}

print("Model loaded. No SDPA kernels should be used now.")



In [None]:
print("Resizing token embeddings to:", len(tokenizer))
model.resize_token_embeddings(len(tokenizer))

print("Applying LoRA config...")
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

try:
    model = get_peft_model(model, lora_config)
    print("LoRA applied successfully.")
except Exception as e:
    print("Error applying LoRA:", e)
    raise


In [None]:
from datasets import Dataset

def load_and_prepare_data(file_path):
    """
    Load the preprocessed markdown file and split it into individual stories
    using the marker "START_OF_STORY". Each story becomes one training example.
    """
    with open(file_path, "r", encoding="utf-8") as f:
        data = f.read()
    # Split on "START_OF_STORY" and remove any empty strings
    stories = data.split("START_OF_STORY")
    stories = [story.strip() for story in stories if story.strip()]
    return stories


preprocessed_file = "/kaggle/input/stories/preprocessed.md"
stories = load_and_prepare_data(preprocessed_file)
print(f"Number of stories: {len(stories)}")



dataset = Dataset.from_dict({"text": stories})


def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=512)

print("Tokenizing dataset...")
tokenized_dataset = dataset.map(
    tokenize_function, 
    batched=True, 
    remove_columns=["text"]
)

print("Sample tokenized row:", tokenized_dataset[0])

#################################################
# Create Data Collator
#################################################
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)


In [None]:
output_dir = "finetuned_sarvam"
print("Output dir:", output_dir)

training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-5,
    weight_decay=0.01,
    save_steps=10,
    logging_steps=5,
    fp16=False,  # 8-bit means we don't rely on half-precision
    no_cuda=False,
    gradient_checkpointing=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)

print("Trainer created.")


In [None]:
print("Starting training...")
try:
    trainer.train()
    print("Training finished!")
except Exception as e:
    print("Error during training:", e)
    raise


In [None]:
model.save_pretrained("./my_finetuned_model")
tokenizer.save_pretrained("./my_finetuned_model")


In [None]:
!zip -r my_finetuned_model.zip ./my_finetuned_model


In [None]:
!zip -r my_finetuned_model.zip ./my_finetuned_model
