In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig

In [9]:
MODEL_ID = "facebook/opt-125m"

model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

In [None]:
# print(model)

lora_config = LoraConfig(
    r=16,
    task_type="CAUSAL_LM")

# Define the vectors you want to tune (you might have to explicitly specify the layers)
# For example, you can specify the layers like this:
# lora_config.target_modules = ["q_proj", "v_proj"]  # Example for a model with these layers
# Note: The actual layer names depend on the model architecture.


# What other types of task_types are supported in LoraConfig constructor?
# Answer: The `task_type` can be "CAUSAL_LM", "SEQ_2_SEQ_LM", "TEXT_CLASSIFICATION", "TOKEN_CLASSIFICATION", etc.
# Here we are using "CAUSAL_LM" for a causal language model like OPT
## How do the modules change based on the task_type?
# The modules that are targeted for LoRA adaptation can vary based on the task type.
# For example, in a text classification task, you might target different layers than in a causal language modeling task.

# By what fraction does LoRA reduce the number of trainable parameters?
# Answer: LoRA can significantly reduce the number of trainable parameters, often by a factor of 10x or more, depending on the model size and the rank `r` specified in the configuration.
# This is because LoRA introduces low-rank matrices that are much smaller than the original weight matrices.


model.add_adapter(adapter_name="lora_adapter_1", adapter_config=lora_config)

model


OPTForCausalLM(
  (model): OPTModel(
    (decoder): OPTDecoder(
      (embed_tokens): Embedding(50272, 768, padding_idx=1)
      (embed_positions): OPTLearnedPositionalEmbedding(2050, 768)
      (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (layers): ModuleList(
        (0-11): 12 x OPTDecoderLayer(
          (self_attn): OPTSdpaAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): lora.Linear(
              (base_layer): Linear(in_features=768, out_features=768, bias=True)
              (lora_dropout): ModuleDict(
                (lora_adapter_1): Identity()
              )
              (lora_A): ModuleDict(
                (lora_adapter_1): Linear(in_features=768, out_features=16, bias=False)
              )
              (lora_B): ModuleDict(
                (lora_adapter_1): Linear(in_features=16, out_features=768, bias=False)
              )
              (lora_embedding_A): ParameterDict()
  

In [4]:
from peft import get_peft_model


lora_config = LoraConfig(
    r=16,
    task_type="CAUSAL_LM",
    # target_modules=['k_proj']
)
peft_model = get_peft_model(model, lora_config, adapter_name="spanish_adapter")
peft_model.add_adapter(
    adapter_name="french_adapter",
    peft_config=lora_config
)
peft_model




PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): OPTForCausalLM(
      (model): OPTModel(
        (decoder): OPTDecoder(
          (embed_tokens): Embedding(50272, 768, padding_idx=1)
          (embed_positions): lora.Embedding(
            (base_layer): OPTLearnedPositionalEmbedding(2050, 768)
            (lora_dropout): ModuleDict(
              (lora_adapter_1): Identity()
            )
            (lora_A): ModuleDict()
            (lora_B): ModuleDict()
            (lora_embedding_A): ParameterDict(  (lora_adapter_1): Parameter containing: [torch.FloatTensor of size 16x2050])
            (lora_embedding_B): ParameterDict(  (lora_adapter_1): Parameter containing: [torch.FloatTensor of size 768x16])
            (lora_magnitude_vector): ModuleDict()
          )
          (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (layers): ModuleList(
            (0-11): 12 x OPTDecoderLayer(
              (self_attn): OPTSdpaAttention(
          

In [5]:
from datasets import load_dataset

spanish_dataset = load_dataset('andreamorgar/spanish_poetry', split='train')
french_dataset = load_dataset('Abirate/french_book_reviews', split='train')


In [6]:
french_dataset[0]


{'book_title': 'Le Démon de la Colline aux Loups',
 'author': 'Dimitri Rouchon-Borie',
 'reader_review': 'Ce n\'est pas le premier roman à aborder les thèmes lourds de l\'inceste et de l\'enfance martyre, mais il le fait avec une audace et un brio incomparables qui rendent ce livre marquant dans une vie de lecteur. On y sent à quel point l\'auteur n\'a pas cherché à "faire quelque chose", on ne sent jamais l\'intention, on sent juste l\'urgence, incandescente, à raconter la vérité d\'un homme maltraité par la vie au point de dire à la nuit «\xa0 tu ne me feras pas peur j\'ai plus de noir que toi dans mon enfance\xa0».',
 'rating': 5.0,
 'label': 1}

In [7]:
def preprocess_spanish_data(examples):
    return examples
        .filter(lambda x: x['content'] != None)
        .map(lambda x: tokenizer(x['content'], max_length=128, truncation=True, padding='max_length'),
        remove_columns=spanish_dataset.column_names
    )

def preprocess_french_data(examples):
    return examples
        .filter(lambda x : x['reader_review'] != None)
        .map(lambda x: tokenizer(x['reader_review'], max_length=128, truncation=True, padding='max_length'),
        remove_columns=french_dataset.column_names
    )

preprocessed_spanish_data = preprocess_spanish_data(spanish_dataset)
preprocessed_french_data = preprocess_french_data(french_dataset)

IndentationError: unexpected indent (3261040518.py, line 3)

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False)
    

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./spanish_model",
    learning_rate=2e-5,
    weight_decay=0.01
)

peft_model.set_adapter("spanish_adapter")

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=preprocessed_spanish_data,
    data_collator=data_collator
)

trainer.train()
    

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
500,3.721
1000,3.6014
1500,3.5804


TrainOutput(global_step=1926, training_loss=3.623957902347806, metrics={'train_runtime': 537.0508, 'train_samples_per_second': 28.662, 'train_steps_per_second': 3.586, 'total_flos': 1019462653181952.0, 'train_loss': 3.623957902347806, 'epoch': 3.0})

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./french_model",
    learning_rate=2e-5,
    weight_decay=0.01
)

peft_model.set_adapter("french_adapter")

trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=preprocessed_french_data,
    data_collator=data_collator
)

trainer.train()
    

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
500,3.5756
1000,3.5267
1500,3.4999
2000,3.4886
2500,3.4926
3000,3.4884
3500,3.4822


TrainOutput(global_step=3624, training_loss=3.50653835742942, metrics={'train_runtime': 1017.8681, 'train_samples_per_second': 28.465, 'train_steps_per_second': 3.56, 'total_flos': 1918918398836736.0, 'train_loss': 3.50653835742942, 'epoch': 3.0})

In [None]:
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

quantized_base_model = AutoModelForCausalLM.from_pretrained(MODEL_ID, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

peft_model = get_peft_model(quantized_base_model, lora_config, adapter_name="spanish_adapter")
peft_model.add_adapter(
    adapter_name="french_adapter",
    peft_config=lora_config
)

In [None]:
def generate_text(prompt, model):
    inputs = tokenizer(prompt, return_tensors="pt")
    output = model.generate(**inputs, max_length=128)
    return tokenizer.decode(output[0], skip_special_tokens=True)




In [None]:
model_id = 'meta-llama/Meta-Llama-3-8B'

from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(model_id)
model


In [None]:
from transformers import AutoTokenizer
from data_loading import DataLoader
from model_loading import ModelLoader
from data_processing import DataCollatorForSupervisedDataset, DataProcessor

GROUP_SIZE_RATIO = 1/4

data = DataLoader.get_data()
print(f"Loaded {len(data)} examples from the dataset.")
print(f"First example: {data[0]}")

model_loader = ModelLoader(group_size_ratio=GROUP_SIZE_RATIO)
model, tokenizer = model_loader.load_and_prepare_model()
print("Model and tokenizer loaded and prepared.")

data_collator = DataCollatorForSupervisedDataset(tokenizer, GROUP_SIZE_RATIO)

data_processor = DataProcessor(tokenizer)
tokenized_data = data_processor.transform(data)
    

In [1]:
from data_loading import DataLoader
from model_loading import ModelLoader
from data_processing import DataCollatorForSupervisedDataset, DataProcessor

GROUP_SIZE_RATIO = 1/4

data = DataLoader.get_data()
model_loader = ModelLoader(group_size_ratio=GROUP_SIZE_RATIO)
model_id = model_loader.model_id
config = model_loader.get_config(model_id)
scaling_factor = model_loader.scaling_factor

tokenizer = model_loader.load_tokenizer(model_id, scaling_factor, config)

data_processor = DataProcessor(tokenizer)

Repo card metadata block was not found. Setting CardData to empty.


In [2]:
tokenized_data = data_processor.transform(data.take(200))
tokenized_data[0]

Transforming dataset...
Dataset transformed.


{'file': None,
 'output': 'Answer: The paper mentions several directions for improvement:\n\n1. Sampling more video frames to capture high-frequency motion information better. The current model may still miss some fine-grained details due to sparse sampling.\n\n2. Using more powerful pre-trained models that specifically model objects and actions. The current patch-level features have limitations in complex object-level reasoning. \n\n3. Combining different pre-trained models to achieve more general reasoning ability. Each model has its own strengths and weaknesses, so an ensemble may perform better.\n\n4. Handling longer videos that are more commonly seen in real-world applications. The current model focuses on efficiency but may still struggle with hours-long videos.\n\n5. Exploring spatial selection modules to reduce computation for high-resolution videos. The current model only uses region selection, but spatial selection may be useful for very complex videos.\n\nIn summary, samplin

In [12]:
d = {'a': [1, 2, 3], 'b': [4, 5, 6]}
for i in d:
    print(i, d[i])

a [1, 2, 3]
b [4, 5, 6]


In [9]:
import torch
torch.tensor([float(-12.342314)]*10) 


tensor([-12.3423, -12.3423, -12.3423, -12.3423, -12.3423, -12.3423, -12.3423,
        -12.3423, -12.3423, -12.3423])