In [1]:
!pip install transformers datasets peft accelerate bitsandbytes 

Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Using cached fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Using cached fsspec-2024.9.0-py3-none-any.whl (179 kB)
Installing collected packages: fsspec
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2024.10.0
    Uninstalling fsspec-2024.10.0:
      Successfully uninstalled fsspec-2024.10.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
xformers 0.0.28.post3 requires torch==2.5.1, but you have torch 2.1.2 which is incompatible.[0m[31m
[0mSuccessfully installed fsspec-2024.9.0


In [None]:
from huggingface_hub.hf_api import HfFolder
HfFolder.save_token("YOUR_HUGGINGFACE_TOKEN")

In [None]:
import os
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from datasets import Dataset
from peft import LoraConfig, get_peft_model
import random
import copy

# Set environment variable to manage memory fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Directories for saving model and offloading
save_dir = os.path.expanduser("llama_pubmed/")
offload_dir = os.path.expanduser("llama_offload/")
os.makedirs(save_dir, exist_ok=True)
os.makedirs(offload_dir, exist_ok=True)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
tokenizer.pad_token = tokenizer.eos_token

quantization_config = BitsAndBytesConfig(load_in_8bit=True)

# Load the model
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Meta-Llama-3-8B",
    device_map="auto",
    offload_folder=offload_dir,
    quantization_config=quantization_config
)

# Configure LoRA
lora_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

# Load dataset from CSV (replace with your dataset path)
file_path = "processed_trials.csv"  # Update with your file path
import pandas as pd
df = pd.read_csv(file_path)

# Convert dataset to Hugging Face Dataset format
ds = Dataset.from_dict({'input': df["detailed_description"].tolist(), 'output': df["brief_summary"].tolist()})


def preprocess_function(examples):
    # Combine input and output for causal LM tasks
    combined_texts = [
        f"{input_text}\n\n###\n\nSummary: {output_text}"
        for input_text, output_text in zip(examples["input"], examples["output"])
    ]

    # Tokenize combined texts
    tokenized = tokenizer(
        combined_texts,
        truncation=True,
        padding="max_length",
        max_length=1024,  # Adjust max length if needed
    )

    return tokenized

# Apply preprocessing
tokenized_datasets = ds.map(preprocess_function, batched=True, remove_columns=["input", "output"])
tokenized_datasets.set_format("torch")

# Split the dataset into train and eval datasets
train_size = int(0.8 * len(tokenized_datasets))
train_dataset = tokenized_datasets.select(range(train_size))
eval_dataset = tokenized_datasets.select(range(train_size, len(tokenized_datasets)))

# Data collator
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Training arguments
training_args = TrainingArguments(
    output_dir=save_dir,
    eval_strategy="steps",
    eval_steps=500,
    logging_steps=500,
    save_steps=1000,
    save_total_limit=2,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    fp16=torch.cuda.is_available(),
    report_to="none",
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

# Train the model
trainer.train()

# Evaluate the model
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")

# Save the fine-tuned model and tokenizer
model.save_pretrained(os.path.join(save_dir, "fine-tuned-llama-lora"))
tokenizer.save_pretrained(os.path.join(save_dir, "fine-tuned-llama-lora"))

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Map:   0%|          | 0/6497 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss,Validation Loss
500,1.7894,1.752174
1000,1.7228,1.738235
1500,1.7107,1.735267
2000,1.6863,1.732622
2500,1.7049,1.733505
3000,1.6743,1.735021
3500,1.6648,1.73396


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load the base model
base_model_name = "meta-llama/Meta-Llama-3-8B"  # Replace with your base model
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)

# Load the LoRA weights
fine_tuned_model_dir = "llama_pubmed/fine-tuned-llama-lora"
model = PeftModel.from_pretrained(base_model, fine_tuned_model_dir)

# Ensure the model is in evaluation mode
model.eval()


# Define a function to generate text based on an input prompt
def generate_summary(text, max_length=512, temperature=0.7, top_p=0.9):
    """
    Generates a summary based on the input text using Meta-Llama.
    Parameters:
    - text: The input text for summarization.
    - max_length: Maximum length of the generated summary.
    - temperature: Controls the randomness of predictions (lower = more deterministic).
    - top_p: Nucleus sampling probability (focuses on top probabilities).
    """
    with torch.no_grad():  # Disable gradient computation for faster inference
        # Use a structured prompt with clear instructions and examples
        prompt = f"""{text}\n\n###\n\nSummary:"""
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)
        outputs = model.generate(
            **inputs,
            max_length=max_length,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,  # Enable sampling for variability
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

# Example text for summarization
example_text = """Endosymbiotic hypothesis in hypothetical alien plastids. this happened earlier today.

so i am a canadian and recently came back from a vacation. 
earlier today, i was in orlando purchasing a sub from a well-known sub chain. i usually get a cold cut w/lettuce, tomato and house sauce.

so after ordering my usual combo, i see the guy spray red sauce all over my sub. i say "nononono i asked for house sauce"

"oh okay i thought you asked for hot sauce. wait...what is house sauce?"

i had no idea they didn't serve house sauce in us."""
print("Generating summary...")
summary = generate_summary(example_text)
print(f"Summary:\n{summary}")

In [None]:
# Example text for summarization
example_text = """this happened earlier today.

so i am a canadian and recently came back from a vacation. 
earlier today, i was in orlando purchasing a sub from a well-known sub chain. i usually get a cold cut w/lettuce, tomato and house sauce.

so after ordering my usual combo, i see the guy spray red sauce all over my sub. i say "nononono i asked for house sauce"

"oh okay i thought you asked for hot sauce. wait...what is house sauce?"

i had no idea they didn't serve house sauce in us."""
print("Generating summary...")
summary = generate_summary(example_text)
print(f"Summary:\n{summary}")