<a href="https://colab.research.google.com/github/abhg86/LLM/blob/main/papier/Pythia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

In [2]:
import torch
import torch.nn as nn
from torch.optim import Adam
from tqdm import tqdm
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

import datasets

from transformers import GPTNeoXForCausalLM, pipeline, AutoTokenizer
from transformers import pipeline as transformers_pipeline

import json
import numpy as np
import pandas as pd

In [3]:

# pipeline = pipeline(
#     "text-generation",
#     model = "EleutherAI/pythia-160m-deduped",
#     revision="step143000",
#     cache_dir="./pythia-160m-deduped/step143000"
#     )

# model = pipeline.model
# tokenizer = pipeline.tokenizer
# # model = GPTNeoXForCausalLM.from_pretrained(
# #   "EleutherAI/pythia-70m-deduped",
# #   revision="step143000",
# #   cache_dir="./pythia-70m-deduped/step143000",
# # )
# # tokenizer = AutoTokenizer.from_pretrained(
# #     "EleutherAI/pythia-70m-deduped",
# #     revision="step143000",
# #     cache_dir="./pythia-70m-deduped/step143000",
# #     )

# inputs = tokenizer("Paris is the capital of", return_tensors="pt")
# tokens = model.generate(**inputs, max_length = 50)
# tokenizer.decode(tokens[0])

In [89]:
class Steer(nn.Module):
  def __init__(self, lm_head, embed_dim, num_steers=2, rank=1000, init_var=1e-2, epsilon=1e-3):
    super().__init__()
    self.projector1 = nn.Linear(embed_dim, rank)
    self.projector2 = nn.Linear(rank, embed_dim)
    self.lm_head = lm_head
    self.rank = rank
    self.epsilon = epsilon
    self.num_steers = num_steers
    self.embed_dim = embed_dim
    self.steer_values = torch.zeros(num_steers)
    self.weight = self.weight()

  def set_values(self, steer_values):
    self.steer_values = steer_values

  def forward(self, x):
    delta = self.projector2(self.projector1(x) * self.steer_values[:, :, None, None])
    # delta = self.projector2(self.projector1(x))
    return self.lm_head(x + self.epsilon * delta)

  def regularization_term(self):
    return torch.norm(self.projector1.weight) + torch.norm(self.projector2.weight)

  def state_dict(self, destination=None, prefix='', keep_vars=False):
    # Call the superclass's state_dict method to handle the destination argument
    state_dict_ = super().state_dict(destination, prefix, keep_vars)

    # Add your custom state to the dictionary
    state_dict_[prefix + 'projector1'] = self.projector1.state_dict()
    state_dict_[prefix + 'projector2'] = self.projector2.state_dict()
    return state_dict_
    return {"projector1": self.projector1, "projector2": self.projector2}

  def load_state_dict(self, state_dict):
    self.projector1.load_state_dict(state_dict["projector1"])
    self.projector2.load_state_dict(state_dict["projector2"])

  def weight(self):
    return [self.projector1.weight, self.projector2.weight]


In [90]:
def train(dataloader, model, steer, tokenizer, n_steps=1000, lr=1e-2, training_steer=0, num_steers=2, max_length=256, regularization=1e-6):
    data_iter = iter(dataloader)

    device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    print("number of training steps:", n_steps)
    start_step = 0
    optimizer = Adam(model.parameters(), lr=lr)

    pbar = tqdm(range(start_step, n_steps))

    for step_i in pbar:
        batch = next(data_iter, None)
        if batch is None:
            data_iter = iter(dataloader)
            batch = next(data_iter, None)

        cur_batch_size = len(batch["text"])
        batch_stance = torch.Tensor(batch["label"]/2  - 1).to(device)
        batch_stance = batch_stance.unsqueeze(1)
        batch_text = batch["text"]
        tokenized = tokenizer(batch_text, padding=True, max_length=max_length, truncation=True, return_tensors="pt")
        input_ids = torch.LongTensor(tokenized["input_ids"]).to(device)

        optimizer.zero_grad()

        attention_mask = torch.LongTensor(tokenized["attention_mask"]).to(device)

        steer.set_values(torch.Tensor(batch["label"]).to(device))

        position_ids = torch.arange(0, input_ids.shape[1], dtype=torch.long, device=device)
        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)

        # print("inpu_ids : ", input_ids.shape)
        # print("attention_mask : ", attention_mask.shape)
        # print("position_ids : ", position_ids.shape)
        # print("batch_stance : ", batch_stance)

        output = model(input_ids=input_ids, attention_mask=attention_mask, position_ids=position_ids, labels=input_ids)
        loss = output.loss
        regularization_term = steer.regularization_term()
        (loss + regularization * regularization_term).backward()
        optimizer.step()


    torch.save([
        steer.state_dict(),
        max(n_steps, start_step)
    ], "train.pt")



In [6]:
dataset_train = datasets.load_dataset("SetFit/sst5")["train"]
dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/421 [00:00<?, ?B/s]

Repo card metadata block was not found. Setting CardData to empty.


train.jsonl:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

dev.jsonl:   0%|          | 0.00/171k [00:00<?, ?B/s]

test.jsonl:   0%|          | 0.00/343k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8544 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1101 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2210 [00:00<?, ? examples/s]

In [26]:
print(len(dataset_train)/32)

267.0


In [7]:
pipeline = pipeline(
    "text-generation",
    model = "EleutherAI/pythia-160m-deduped",
    revision="step143000",
    cache_dir="./pythia-160m-deduped/step143000",
    device= "cuda:0" if torch.cuda.is_available() else "cpu"
    )


config.json:   0%|          | 0.00/569 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

In [91]:

model = pipeline.model
tokenizer = pipeline.tokenizer
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
model.config.pad_token_id = model.config.eos_token_id

for param in model.parameters():
  param.requires_grad = False

steer = Steer(model.embed_out, model.config.hidden_size)
model.lm_head = steer

# vocab_size = len(tokenizer)
# model.resize_token_embeddings(vocab_size)



In [92]:
train(dataloader_train, model, steer, tokenizer)

number of training steps: 1000


100%|██████████| 1000/1000 [02:08<00:00,  7.75it/s]
  state_dict_ = super().state_dict(destination, prefix, keep_vars)


In [31]:
torch.save(steer.state_dict(), "model_eps1_1000.pt")

  state_dict_ = super().state_dict(destination, prefix, keep_vars)


In [10]:
# load = torch.load("train.pt")[0]
# steer.load_state_dict(load)

In [93]:
def generate(prompt_data, tokenizer, model, steer, steer_values=[-5,1],
             prompt_num=10, prompt_length=20, num_beams=1, num_beam_groups=1,
             do_sample=True, temperature=1, top_p=0.9):
    device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)
    model.eval()

    for _prompt in tqdm(prompt_data):
        _prompt["generations"] = []
        prompt_text = _prompt["prompt"]["text"]
        token_length = tokenizer(prompt_text,return_tensors="pt")["input_ids"].shape[1]

        steer_values = torch.Tensor(list(map(float,steer_values))).to(device)
        steer.set_values(steer_values[None])
        # steer.epsilon = 10
        for _i in range(prompt_num):
            with torch.no_grad():
                inputs = tokenizer(prompt_text, return_tensors="pt").to(device)
                output = model.generate(
                    **inputs,
                    max_length=token_length+prompt_length,
                    min_length=token_length+prompt_length,
                    num_beams=num_beams,
                    num_beam_groups=num_beam_groups,
                    do_sample=do_sample,
                    temperature=temperature,
                    top_p=top_p,
                    pad_token_id=tokenizer.eos_token_id
                )
                output = tokenizer.decode(output[0], skip_special_tokens=True)
                output = output[len(prompt_text):]
                _prompt["generations"].append({
                    "text": output
                })
    return prompt_data

In [94]:
sentiment = "neutral"

# Requires prompts from the paper's github repo (https://github.com/Glaciohound/LM-Steer/blob/main/data/prompts/sentiment_prompts-10k/positive_prompts.jsonl#L2498)
with open(sentiment + "_prompts.jsonl", "r") as f:
  prompt_data = list(map(json.loads, f.readlines()))

prompt_data = prompt_data[:100]

# steer.epsilon = 0.5
generated_data = generate(prompt_data, tokenizer, model, steer, steer_values=[5,1], prompt_num=5)

100%|██████████| 100/100 [01:53<00:00,  1.13s/it]


In [79]:
print(generated_data[np.random.randint(0,len(generated_data))])

{'md5_hash': '79141643c821ada148722cdda8e23ae1', 'prompt': {'text': 'So we sold 73 million'}, 'continuation': {'text': 'shares in General Motors.'}, 'num_positive': 12, 'generations': [{'text': ' people of which 50,000 have had it; 50 million had it now; and over 100,'}, {'text': ' worth of the books we have, plus the company has been worth over 1 billion dollars.\n\n'}, {'text': ", which is an insane number.\n\nOne thing I've been curious about is how it works"}, {'text': ' more products to raise the bill.\n\nThe bill in effect in 2011 cost taxpayers over $100'}, {'text': ' of their old cars and new trucks in October.\n\nThe best part of these new vehicles was'}]}


In [80]:
def eval(classifier, generations_df, sentiment_file=None, max_length=256):

    # score generations and write to sentiment.jsonl
    print("writing outputs to ", str(sentiment_file))
    if sentiment_file is not None:
        fo = open(sentiment_file, 'w')
    from pprint import pprint
    accuracies = []
    scores = []
    for i, row in tqdm(generations_df.iterrows(), total=len(generations_df.index), desc='Scoring generation sentiments'):
        prompt = row.prompt['text']
        generations = [gen['text'] for gen in row['generations']]
        sentences_for_prompt= []
        for gen in generations:
            sentences_for_prompt.append(f'{prompt}{gen}')

        positive_proportion = 0
        score = 0
        try:
            predictions_for_prompt = classifier(sentences_for_prompt,
                                                max_length=512)
        except IndexError: # sometimes the generation is too long?
            print("exception occured, please check")
            predictions_for_prompt = [{'label': "", 'score': float('nan')}] * len(sentences_for_prompt)
        # print(predictions_for_prompt)
        for prediction in predictions_for_prompt:
            score += prediction["score"] if prediction["label"] != "NEGATIVE" else 1 - prediction["score"]
            positive_proportion += float(prediction["label"] == "POSITIVE")
        score /= len(predictions_for_prompt)
        positive_proportion = positive_proportion / len(predictions_for_prompt)
        # print(positive_proportion)
        scores.append(score)
        accuracies.append(positive_proportion)
        # input()
        # print(predictions_for_prompt)
        if sentiment_file is not None:
            for res in predictions_for_prompt:
                fo.write(json.dumps(res) + '\n')

    print(accuracies)
    return np.nanmean(accuracies), np.std(accuracies), np.nanmean(scores), np.std(scores)


In [36]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

classifier = transformers_pipeline('sentiment-analysis', device=device)
# classifier = pipeline(model='siebert/sentiment-roberta-large-english')

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [95]:
accuracy, std, score, stdd = eval(classifier, pd.DataFrame(generated_data))
print("\n accuracy : ", accuracy)
print("std : ", std)
print("score : ", score)
print("stdd : ", stdd)

writing outputs to  None


Scoring generation sentiments: 100%|██████████| 100/100 [00:02<00:00, 42.47it/s]

[0.8, 0.2, 0.6, 0.4, 0.2, 0.0, 0.4, 0.4, 0.4, 0.8, 0.8, 0.8, 0.8, 0.8, 0.4, 0.8, 0.4, 0.8, 0.8, 0.2, 0.8, 0.4, 0.0, 0.6, 0.6, 0.2, 0.8, 0.4, 0.8, 0.4, 1.0, 0.4, 0.0, 0.4, 0.4, 0.6, 0.4, 0.8, 0.6, 0.2, 0.8, 0.4, 1.0, 0.8, 0.6, 0.4, 0.4, 1.0, 0.4, 0.6, 0.8, 0.6, 0.8, 0.2, 1.0, 0.6, 0.6, 0.4, 0.8, 0.4, 0.2, 0.6, 0.4, 0.4, 0.6, 0.4, 0.8, 0.8, 0.6, 1.0, 0.2, 0.2, 0.0, 0.6, 0.4, 0.6, 0.4, 0.2, 0.6, 0.6, 0.2, 0.0, 0.8, 0.4, 0.8, 0.8, 0.6, 0.8, 0.6, 0.4, 0.6, 1.0, 0.2, 0.6, 0.2, 0.8, 0.4, 0.4, 0.4, 0.0]

 accuracy :  0.5279999999999999
std :  0.2615645235883491
score :  0.5362575963735581
stdd :  0.23716268391292686



