In [1]:
import transformers
import peft
import pandas as pd

In [2]:
def load_model(model: str):
    # load model
    m = transformers.AutoModelForCausalLM.from_pretrained(
        model,
        device_map='auto',
        trust_remote_code=False,
        revision='main'
    )
    
    return m

m = load_model("TheBloke/Mistral-7B-Instruct-v0.2-GPTQ")


Some weights of the model checkpoint at TheBloke/Mistral-7B-Instruct-v0.2-GPTQ were not used when initializing MistralForCausalLM: ['model.layers.0.mlp.down_proj.bias', 'model.layers.0.mlp.gate_proj.bias', 'model.layers.0.mlp.up_proj.bias', 'model.layers.0.self_attn.k_proj.bias', 'model.layers.0.self_attn.o_proj.bias', 'model.layers.0.self_attn.q_proj.bias', 'model.layers.0.self_attn.v_proj.bias', 'model.layers.1.mlp.down_proj.bias', 'model.layers.1.mlp.gate_proj.bias', 'model.layers.1.mlp.up_proj.bias', 'model.layers.1.self_attn.k_proj.bias', 'model.layers.1.self_attn.o_proj.bias', 'model.layers.1.self_attn.q_proj.bias', 'model.layers.1.self_attn.v_proj.bias', 'model.layers.10.mlp.down_proj.bias', 'model.layers.10.mlp.gate_proj.bias', 'model.layers.10.mlp.up_proj.bias', 'model.layers.10.self_attn.k_proj.bias', 'model.layers.10.self_attn.o_proj.bias', 'model.layers.10.self_attn.q_proj.bias', 'model.layers.10.self_attn.v_proj.bias', 'model.layers.11.mlp.down_proj.bias', 'model.layers.11

In [4]:
def load_and_prepare_dataset(m):
    df = pd.read_csv('data/data_openai_api_with_mask.csv')
    df = df[['text', 'ethical_us']]
    instruction = ''
    df['data'] = df.apply((lambda row: f'''<s>[INST] {instruction} \n{row['text']} \n[/INST] {row['ethical_us']}</s>'''), axis=1)
    t = transformers.AutoTokenizer.from_pretrained(m, use_fast=True)

    for i, values in enumerate(df['data']):
        t.truncation_side = 'left'
        tokenized_inputs = t(
            values,
            return_tensors='np',
            truncation=True,
            max_length=512
        )
        df['data'][i] = tokenized_inputs

    t.pad_token = t.eos_token
    data_collator = transformers.DataCollatorForLanguageModeling(t, mlm=False)

    return df['data'], data_collator, t


tokenized_data, data_collator, t = load_and_prepare_dataset(m)

OSError: Incorrect path_or_model_id: 'MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (rotary_emb): MistralRotaryEmbedding()
          (k_proj): QuantLinear()
          (o_proj): QuantLinear()
          (q_proj): QuantLinear()
          (v_proj): QuantLinear()
        )
        (mlp): MistralMLP(
          (act_fn): SiLU()
          (down_proj): QuantLinear()
          (gate_proj): QuantLinear()
          (up_proj): QuantLinear()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm()
  )
  (lm_head): Linear(in_features=4096, out_features=32000, bias=False)
)'. Please provide either the path to a local folder or the repo_id of a model on the Hub.

In [None]:
def train_model(model, lr, batch_size, num_epochs, tokenized_data, collator):
    model.train() # training state
    model.gradient_checkpoint_enable()
    model = peft.prepare_model_for_kbit_training(model) # turn into qlora

    # lora config
    config = peft.LoraConfig(
        r=8,
        lora_alpha=32,
        target_modules=["q_proj"],
        lora_dropout=.05,
        bias="none",
        task_type="CAUSAL_LLM"
    )

    model = peft.get_peft_model(model, config) # model in lora style
    

    training_args = transformers.TrainingArguments(
        output_dir= "../model",
        learning_rate=lr,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=num_epochs,
        weight_decay=0.01,
        logging_strategy="epoch",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        fp16=True,
        optim="paged_adamw_8bit",

    )

    trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_data["train"],
    # eval_dataset=tokenized_data["test"],
    args=training_args,
    data_collator=collator
    )

    # train model
    model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
    trainer.train()

    # renable warnings
    model.config.use_cache = True

    return model

final_model = train_model(m, 1e-4, 4, 10, tokenized_data, data_collator)