In [1]:
%env TOKENIZERS_PARLLELISM=false
%env WANDB_PROJECT=O4

SAMPLES = 100
PHASES = 5

env: TOKENIZERS_PARLLELISM=false
env: WANDB_PROJECT=O4


In [2]:
import gym
import compiler_gym

import pandas as pd
from torch import nn

import compiler_gym  # imports the CompilerGym environments
import gym
import pandas as pd
from tokenizers import BertWordPieceTokenizer, ByteLevelBPETokenizer
from tokenizers.processors import BertProcessing
from transformers import (
    AutoModelForMaskedLM,
    AutoModelForPreTraining,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BertTokenizerFast,
    DataCollatorWithPadding,
    PreTrainedTokenizerFast,
    RobertaForSequenceClassification,
    RobertaTokenizerFast,
    Trainer,
    TrainingArguments,
)

from tokenizers.processors import BertProcessing
from datasets import Dataset
from compiler_gym.envs import LlvmEnv

In [3]:
# Create gym environment
env = gym.make("llvm-ic-v0")

In [4]:
tokenizer = RobertaTokenizerFast.from_pretrained("microsoft/codebert-base-mlm")
model = RobertaForSequenceClassification.from_pretrained(
        "microsoft/codebert-base-mlm", num_labels=1
    )

Some weights of the model checkpoint at microsoft/codebert-base-mlm were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base-mlm and are new

## Dataset

In [17]:
def prepare_opt_dataset(tokenizer, env: LlvmEnv, samples=SAMPLES, max_phases=PHASES):

    env_samples = {"ir": [], "ic": []}
    action = env.action_space.sample()

    for _ in range(samples):
        env.reset()
        for _ in range(max_phases):
            text = env.action_space.to_string(action) + env.observation["Ir"]
            action = env.action_space.sample()
            _, reward, done, info = env.step(action)
            if done: break
            env_samples["ir"].append(text)
            env_samples["ic"].append(action)

    dataset = Dataset.from_dict(env_samples)

    def preprocess(example):
        return tokenizer(example["text"], padding=True, truncation=True)

    tokenized = dataset.map(preprocess, batched=True)
    tokenized.set_format(type="torch", columns=["input_ids", "label", "attention_mask"])

    return tokenized

In [18]:
# Preapre datasets
training_dataset = prepare_opt_dataset(tokenizer, env, samples=100, max_phases=20)
eval_dataset = prepare_opt_dataset(tokenizer, env, samples=20, max_phases=20)

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

## Model Training

In [6]:
cost_tokenizer = RobertaTokenizerFast.from_pretrained("../results/cost")
cost_model = RobertaForSequenceClassification.from_pretrained("../results/cost", num_labels=1)

In [15]:
inputs = cost_tokenizer("hello world", return_tensors='pt')
cost_model(**inputs)
# cost_tokenizer("hello world")

SequenceClassifierOutput(loss=None, logits=tensor([[0.1173]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [14]:

class PolicyTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs = model(**inputs)
        logits = outputs.get('logits').view(-1, self.model.config.num_labels)
        action = env.action_space.names[nn.Softmax(dim=-1)(logits)]
        
        loss_fct = nn.MSELoss()
        loss = loss_fct(logits.view(-1, self.model.config.num_labels),
                        labels.float().view(-1, self.model.config.num_labels))
        return (loss, outputs) if return_outputs else loss

In [None]:
# Use the DataCollatorWithPadding for more efficient batched padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

training_args = TrainingArguments(
    output_dir="results",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    report_to="wandb",
    run_name="codebert-llvm-ic",
    # push_to_hub=True,
    hub_model_id="codebert-llvm-ic",
)



trainer = PolicyTrainer(
        model=model,
        args=training_args,
        train_dataset=training_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
    )

trainer.train()