In [0]:
%pip install --upgrade transformers accelerate peft ai2-olmo bitsandbytes mlflow pynvml
dbutils.library.restartPython()

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
Collecting transformers
  Using cached transformers-4.39.3-py3-none-any.whl (8.8 MB)
Collecting accelerate
  Using cached accelerate-0.29.2-py3-none-any.whl (297 kB)
Collecting peft
  Using cached peft-0.10.0-py3-none-any.whl (199 kB)
Collecting ai2-olmo
  Using cached ai2_olmo-0.2.5-py3-none-any.whl (118 kB)
Collecting bitsandbytes
  Using cached bitsandbytes-0.43.0-py3-none-manylinux_2_24_x86_64.whl (102.2 MB)
Collecting mlflow
  Using cached mlflow-2.11.3-py3-none-any.whl (19.7 MB)
Collecting pynvml
  Using cached pynvml-11.5.0-py3-none-any.whl (53 kB)
Collecting safetensors>=0.4.1
  Using cached safetensors-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Collecting huggingface-hub<1.0,>=0.19.3
  Using cached huggingface_hub-0.22.2-py3-none-any.whl (388 kB)
Collecting rich
  Using cached rich-13.7.1-py3-none-any.whl (240 kB)
Collecting cached-pat

In [0]:
from peft import LoraConfig, TaskType

lora_config = LoraConfig(
    r=16,
    target_modules=["att_proj", "ff_proj"],
    task_type=TaskType.CAUSAL_LM,
    lora_alpha=32,
    lora_dropout=0.05
)

2024-04-09 19:41:44.772182: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-09 19:41:44.772244: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-09 19:41:44.772268: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-09 19:41:44.779373: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Unexpected internal error when monkey patching `Tr

In [0]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import hf_olmo

tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B")

In [0]:

model = AutoModelForCausalLM.from_pretrained("allenai/OLMo-1B",
                                             trust_remote_code=True,
                                             cache_dir = "/Volumes/daniel_liden/datasets/h2o_rag",
                                             device_map="auto",
                                             load_in_8bit=True)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
Some weights of OLMoForCausalLM were not initialized from the model checkpoint at allenai/OLMo-1B and are newly initialized: ['model.transformer.ff_out.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [0]:
model.model

Olmo(
  (transformer): ModuleDict(
    (wte): Embedding(50304, 2048)
    (emb_drop): Dropout(p=0.0, inplace=False)
    (ln_f): LayerNorm()
    (blocks): ModuleList(
      (0-15): 16 x OlmoSequentialBlock(
        (dropout): Dropout(p=0.0, inplace=False)
        (act): SwiGLU()
        (attn_out): Linear8bitLt(in_features=2048, out_features=2048, bias=False)
        (ff_out): Linear8bitLt(in_features=8192, out_features=2048, bias=False)
        (rotary_emb): RotaryEmbedding()
        (attn_norm): LayerNorm()
        (ff_norm): LayerNorm()
        (att_proj): Linear8bitLt(in_features=2048, out_features=6144, bias=False)
        (ff_proj): Linear8bitLt(in_features=2048, out_features=16384, bias=False)
      )
    )
    (ff_out): Embedding(50304, 2048)
  )
)

In [0]:
from peft import get_peft_model

lora_model = get_peft_model(model, lora_config)
lora_model.print_trainable_parameters()

trainable params: 6,815,744 || all params: 1,183,580,160 || trainable%: 0.5758582502768549


In [0]:
model.add_adapter(lora_config)

In [0]:
from datasets import Dataset, DatasetDict, load_dataset

# Load the WikiText-2 dataset
wikitext = load_dataset("wikitext", "wikitext-2-raw-v1")

# Tokenize the dataset
def tokenize_function(example):
    # Split the example into individual lines
    lines = example["text"].split("\n")
    
    # Remove empty lines and lines starting with ' ='
    filtered_lines = [line for line in lines if line.strip() and not line.startswith(' =')]
    
    # Join the filtered lines back into a single string
    text = "\n".join(filtered_lines)
    
    input_text = "Replace all es or Es with 3s in the following text.\n\n### Input:\n" + text + "\n\n### Output:\n"
    output_text = text.replace("e", "3").replace("E", "3") + "<|endoftext|>"
    
    return tokenizer(input_text + output_text, truncation=True, max_length=128)

# Tokenize the train and validation splits
tokenized_train = wikitext["train"].map(tokenize_function, num_proc=4, remove_columns=["text"])
tokenized_validation = wikitext["validation"].map(tokenize_function, num_proc=4, remove_columns=["text"])

# Shuffle the datasets
tokenized_train = tokenized_train.shuffle(seed=42)
tokenized_validation = tokenized_validation.shuffle(seed=42)

# Select the desired number of examples
train_dataset = tokenized_train.select(range(8000))
eval_dataset = tokenized_validation.select(range(2000))

# Create a DatasetDict with the selected subsets
dataset_dict = DatasetDict({
    "train": train_dataset,
    "eval": eval_dataset
})



In [0]:
dataset_dict['train']

Dataset({
    features: ['input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 8000
})

In [0]:
from transformers import DataCollatorForLanguageModeling, TrainingArguments, Trainer

# Define the data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="/Volumes/daniel_liden/datasets/h2o_rag/output",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    learning_rate=1e-4,
    weight_decay=0.01,
    logging_steps=1,
    save_steps=500,
    save_total_limit=2,
    evaluation_strategy="steps",
    eval_steps=50,
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_dict['train'],
    eval_dataset=dataset_dict['eval'],
    data_collator=data_collator,
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [0]:
import mlflow

# Start training and track with MLflow
with mlflow.start_run(log_system_metrics=True):
    trainer.evaluate() # eval before starting tuning
    trainer.train()
    mlflow.log_params(training_args.to_dict())

2024/04/09 19:41:55 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.




Step,Training Loss,Validation Loss
50,1.5771,1.766602
100,1.8257,1.725586
150,1.6582,1.712891
200,1.6655,1.704102
250,1.9663,1.694336
300,1.5964,1.697266
350,1.6777,1.700195
400,1.7029,1.696289
450,1.1433,1.694336
500,1.6677,1.692383




config.json:   0%|          | 0.00/1.32k [00:00<?, ?B/s]

2024/04/09 20:02:32 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/04/09 20:02:32 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


In [0]:
def generate(input_text, max_new_tokens=100):
    # Create the prompt template
    prompt_template = "Replace all es or Es with 3s in the following text.\n\n### Input:\n{input_text}\n\n### Output:\n"
    
    # Format the prompt with the input text
    formatted_prompt = prompt_template.format(input_text=input_text)
    
    # Tokenize the formatted prompt
    input_ids = tokenizer(formatted_prompt, return_tensors="pt").input_ids.to(model.device)
    
    # Generate the output using the trained model
    gen_tokens = model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        eos_token_id=tokenizer.eos_token_id,
        repetition_penalty=1.1,
    )
    
    # Decode the generated output
    generated_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=False)[0]
    
    # Extract the generated output after "### Output:"
    generated_output = generated_text.split("### Output:")[-1].strip()
    
    return generated_output

# Example usage
example_text = """
The quick brown fox jumps over the lazy dog.
Elephants are the largest land mammals on Earth.
The Earth revolves around the Sun, which is a star.
"""

# Generate the output using the trained model
generated_output = generate(example_text)

print("Generated Output:")
print(generated_output)

[0;31m---------------------------------------------------------------------------[0m
[0;31mTypeError[0m                                 Traceback (most recent call last)
File [0;32m<command-422193997890608>, line 35[0m
[1;32m     28[0m example_text [38;5;241m=[39m [38;5;124m"""[39m
[1;32m     29[0m [38;5;124mThe quick brown fox jumps over the lazy dog.[39m
[1;32m     30[0m [38;5;124mElephants are the largest land mammals on Earth.[39m
[1;32m     31[0m [38;5;124mThe Earth revolves around the Sun, which is a star.[39m
[1;32m     32[0m [38;5;124m"""[39m
[1;32m     34[0m [38;5;66;03m# Generate the output using the trained model[39;00m
[0;32m---> 35[0m generated_output [38;5;241m=[39m generate(example_text)
[1;32m     37[0m [38;5;28mprint[39m([38;5;124m"[39m[38;5;124mGenerated Output:[39m[38;5;124m"[39m)
[1;32m     38[0m [38;5;28mprint[39m(generated_output)

File [0;32m<command-422193997890608>, line 12[0m, in [0;36mgenerate[0;34m(input_