#pip

In [None]:
!pip install torchdata
!pip install 'portalocker>=2.0.0'
!pip install sentencepiece
!wget https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt

Collecting portalocker>=2.0.0
  Downloading portalocker-2.7.0-py2.py3-none-any.whl (15 kB)
Installing collected packages: portalocker
Successfully installed portalocker-2.7.0
Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99
--2023-07-31 12:01:47--  https://raw.githubusercontent.com/google/sentencepiece/master/data/botchan.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 278779 (272K) [text/plain]
Saving to: ‘botchan.txt’


2023-07-31 12:01:47 (22.5 MB/s) - ‘botchan.tx

In [None]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/peft.git git+https://github.com/huggingface/transformers.git

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m518.9/518.9 kB[0m [31m42.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m24.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml

#func

In [None]:
import math
import os
from tempfile import TemporaryDirectory
from typing import Tuple

import torch
from torch import nn, Tensor
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import dataset
from torchtext.datasets import WikiText2
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
import torch.optim as optim
import numpy as np
from random import *

import sentencepiece as spm
from sentencepiece import SentencePieceProcessor

from typing import List
from typing import Optional, Tuple

import unicodedata
import re


In [None]:
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", r" ", s).strip()
    return s

#tokenizer&model&llm

In [None]:
class Tokenizer(nn.Module):
    def __init__(self, model_path: str):
        # reload tokenizer
        assert os.path.isfile(model_path), model_path
        self.sp_model = SentencePieceProcessor(model_file=model_path)
        self.n_words: int = self.sp_model.vocab_size()
        self.bos_id: int = self.sp_model.bos_id()
        self.eos_id: int = self.sp_model.eos_id()
        self.pad_id: int = self.sp_model.pad_id()
        #logger.info(f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}")
        assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()

    def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
        assert type(s) is str
        t = self.sp_model.encode(s)
        if bos:
            t = [self.bos_id] + t
        if eos:
            t = t + [self.eos_id]
        return t
    def decode(self, t: List[int]) -> str:
        return self.sp_model.decode(t)

In [None]:
class RMSNorm(torch.nn.Module):
    def __init__(self, dim: int, eps: float = 1e-6):
        super().__init__()
        self.eps = eps
        self.weight = nn.Parameter(torch.ones(dim))

    def _norm(self, x):
        return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)

    def forward(self, x):
        output = self._norm(x.float()).type_as(x)
        return output * self.weight

In [None]:
def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0):
    freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
    t = torch.arange(end, device=freqs.device)  # type: ignore
    freqs = torch.outer(t, freqs).float()  # type: ignore
    freqs_cis = torch.polar(torch.ones_like(freqs), freqs)  # complex64
    return freqs_cis


def apply_rotary_emb(
    xq: torch.Tensor,
    xk: torch.Tensor,
    freqs_cis: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
    xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
    xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
    #print(freqs_cis.size())
    #print(xq_.size())
    #print(xq_.shape[1:])
    freqs_cis = freqs_cis.view(xq_.shape[1:])
    xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
    xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
    return xq_out.type_as(xq), xk_out.type_as(xk)

In [None]:
class ModelArgs:
    dim: int = 512
    n_layers: int = 3
    n_heads: int = 8
    vocab_size: int = 3000 # defined later by tokenizer
    multiple_of: int = 256  # make SwiGLU hidden layer size multiple of large power of 2
    norm_eps: float = 1e-5

    max_batch_size: int = 32
    max_seq_len: int = 2048

In [None]:
512//8

64

In [None]:
w=nn.Linear(512,512)

In [None]:
class Attention(nn.Module):
    def __init__(self, args: ModelArgs):
        super().__init__()
        self.n_heads=args.n_heads
        self.head_dim = args.dim // args.n_heads
        self.wq=nn.Linear(args.dim,self.head_dim*args.n_heads)
        self.wk=nn.Linear(args.dim,self.head_dim*args.n_heads)
        self.wv=nn.Linear(args.dim,self.head_dim*args.n_heads)
        self.wo=nn.Linear(self.head_dim*args.n_heads,args.dim)

        self.cache_k = torch.zeros(
            (args.max_batch_size, args.max_seq_len, args.n_heads, self.head_dim)
        ).cuda()
        self.cache_v = torch.zeros(
            (args.max_batch_size, args.max_seq_len, args.n_heads, self.head_dim)
        ).cuda()

    def forward(self, x: torch.Tensor, start_pos: int, freqs_cis: torch.Tensor, mask: Optional[torch.Tensor]):
        bsz, seqlen,_ = x.shape
        xq=self.wq(x).view(bsz, -1, self.n_heads, self.head_dim)
        xk=self.wk(x).view(bsz, -1, self.n_heads, self.head_dim)
        xv=self.wv(x).view(bsz, -1, self.n_heads, self.head_dim)
        xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis)
        self.cache_k[:bsz, start_pos : start_pos + seqlen].data = xk
        self.cache_v[:bsz, start_pos : start_pos + seqlen].data = xv
        keys = self.cache_k[:bsz, : start_pos + seqlen]
        values = self.cache_v[:bsz, : start_pos + seqlen]
        xq = xq.transpose(1, 2)
        keys = keys.transpose(1, 2)
        values = values.transpose(1, 2)
        print(xq.is_cuda)
        print(keys.is_cuda)
        print(values.is_cuda)
        scores = torch.matmul(xq, keys.transpose(2, 3)) / math.sqrt(self.head_dim)

        if mask is not None:
            scores = scores + mask  # (bs, n_local_heads, slen, cache_len + slen)
        scores = F.softmax(scores.float(), dim=-1).type_as(xq)
        output = torch.matmul(scores, values)  # (bs, n_local_heads, slen, head_dim)
        output = output.transpose(
            1, 2
        ).contiguous().view(bsz, seqlen, -1)

        return self.wo(output)


In [None]:
class FeedForward(nn.Module):
    def __init__(
        self,
        dim: int,
        hidden_dim: int,
        multiple_of: int,):

        super().__init__()
        hidden_dim = int(2 * hidden_dim / 3)
        hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)

        self.w1=nn.Linear(dim,hidden_dim)
        self.w2=nn.Linear(hidden_dim,dim)
        self.w3=nn.Linear(dim,hidden_dim)

    def forward(self, x):
        return self.w2(F.silu(self.w1(x)) * self.w3(x))

In [None]:
class TransformerBlock(nn.Module):
    def __init__(self, layer_id: int, args: ModelArgs):
        super().__init__()
        self.n_heads = args.n_heads
        self.dim = args.dim
        self.head_dim = args.dim // args.n_heads
        self.attention = Attention(args)
        self.feed_forward = FeedForward(dim=args.dim, hidden_dim=4 * args.dim, multiple_of=args.multiple_of)
        self.layer_id = layer_id
        self.attention_norm = RMSNorm(args.dim, eps=args.norm_eps)
        self.ffn_norm = RMSNorm(args.dim, eps=args.norm_eps)


    def forward(self, x: torch.Tensor, start_pos: int, freqs_cis: torch.Tensor, mask: Optional[torch.Tensor]):
        h = x + self.attention.forward(self.attention_norm(x), start_pos, freqs_cis, mask)
        out = h + self.feed_forward.forward(self.ffn_norm(h))
        return out

In [None]:
class Transformer(nn.Module):
    def __init__(self, params: ModelArgs):
        super().__init__()
        self.params = params
        self.vocab_size = params.vocab_size
        self.n_layers = params.n_layers
        self.tok_embeddings=nn.Embedding( params.vocab_size, params.dim)
        self.layers = torch.nn.ModuleList()
        for layer_id in range(params.n_layers):
            self.layers.append(TransformerBlock(layer_id, params))
        self.norm = RMSNorm(params.dim, eps=params.norm_eps)
        self.output=nn.Linear(params.dim,params.vocab_size)
        self.freqs_cis = precompute_freqs_cis(self.params.dim, self.params.max_seq_len * 2)


    def forward(self, tokens: torch.Tensor, start_pos: int):
        _bsz, seqlen = tokens.shape
        h = self.tok_embeddings(tokens)
        self.freqs_cis = self.freqs_cis.to(h.device)
        freqs_cis = self.freqs_cis[start_pos : start_pos + seqlen]

        mask = None
        if seqlen > 1:
            mask = torch.full((1, 1, seqlen, seqlen), float("-inf"), device=tokens.device)
            mask = torch.triu(mask, diagonal=start_pos + 1).type_as(h)

        for layer in self.layers:
            h = layer(h, start_pos, freqs_cis, mask)
        h = self.norm(h)
        output = self.output(h)
        return output.float()

In [None]:
class LM:
    def __init__(self, model: Transformer, tokenizer: Tokenizer):
        self.model = model
        self.tokenizer = tokenizer
    @torch.no_grad()
    def generate(
        self,
        prompts: List[str],
        max_gen_len: int,
        temperature: float = 0.8,
        top_p: float = 0.95,
    ) -> List[str]:
        bsz = len(prompts)
        params = self.model.params
        assert bsz <= params.max_batch_size, (bsz, params.max_batch_size)
        prompt_tokens = [self.tokenizer.encode(x, bos=True, eos=False) for x in prompts]
        min_prompt_size = min([len(t) for t in prompt_tokens])
        max_prompt_size = max([len(t) for t in prompt_tokens])
        total_len = min(params.max_seq_len, max_gen_len + max_prompt_size)
        tokens = torch.full((bsz, total_len), self.tokenizer.pad_id).cuda().long()
        for k, t in enumerate(prompt_tokens):
            tokens[k, : len(t)] = torch.tensor(t).long()
        input_text_mask = tokens != self.tokenizer.pad_id

        start_pos = min_prompt_size
        prev_pos = 0
        for cur_pos in range(start_pos, total_len):
            logits = self.model.forward(tokens[:, prev_pos:cur_pos], prev_pos)[:,-1,:]
            if temperature > 0:
                probs = torch.softmax(logits / temperature, dim=-1)
                next_token = sample_top_p(probs, top_p)
            else:
                next_token = torch.argmax(logits, dim=-1)
            next_token = next_token.reshape(-1)
            # only replace token if prompt has already been generated
            next_token = torch.where(
                input_text_mask[:, cur_pos], tokens[:, cur_pos], next_token
            )
            tokens[:, cur_pos] = next_token
            prev_pos = cur_pos

        decoded = []
        for i, t in enumerate(tokens.tolist()):
            # cut to max gen len
            t = t[: len(prompt_tokens[i]) + max_gen_len]
            # cut to eos tok if any
            try:
                t = t[: t.index(self.tokenizer.eos_id)]
            except ValueError:
                pass
            decoded.append(self.tokenizer.decode(t))
        return decoded


def sample_top_p(probs, p):
    probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True)
    probs_sum = torch.cumsum(probs_sort, dim=-1)
    mask = probs_sum - probs_sort > p
    probs_sort[mask] = 0.0
    probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True))
    next_token = torch.multinomial(probs_sort, num_samples=1)
    next_token = torch.gather(probs_idx, -1, next_token)
    return next_token

#token

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
spm.SentencePieceTrainer.Train(
    input='botchan.txt',
    model_prefix='m',
    vocab_size=3000,
    pad_id=3)
sp = spm.SentencePieceProcessor()
sp.load('m.model')

tokenizer=Tokenizer('m.model')
train_iter = WikiText2(split='train')
sentence=[]
for item in train_iter :
  if item.strip() == '':pass

  elif len(item.strip()) <=90:pass
  elif len(item.strip()) >=800:pass
  else :sentence.append((item.strip()))


In [None]:
token=[tokenizer.encode(x, bos=True, eos=True) for x in sentence]
max_batch_size: int = 32
max_seq_len: int = 2048
min_prompt_size = min([len(t) for t in token])
max_prompt_size = max([len(t) for t in token])
total_len = min(max_seq_len, 30 + max_prompt_size)

bsz=len(token)

tokens=torch.full((bsz, total_len),tokenizer.pad_id)
for k, t in enumerate(token):
            tokens[k, : len(t)] = torch.tensor(t)


In [None]:
labels=torch.full((bsz, total_len),tokenizer.pad_id)
rand = torch.rand(tokens.shape)
mask_arr = rand < 0.15

In [None]:
for k,t in enumerate(labels):
     if random()>=0.1:
            labels[k][torch.where(mask_arr[k] == 1)] =tokens[k][torch.where(mask_arr[k] == 1)]
     else:
      labels[k][torch.where(mask_arr[k] == 1)]  =torch.tensor([randint(0,2999) for i in  range(len(labels[k][torch.where(mask_arr[k] == 1)])) ])

In [None]:
lines = open('formatted_movie_lines.txt', encoding='utf-8').\
        read().strip().split('\n')
    # Split every line into pairs and normalize
pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

In [None]:
con1=[ pairs[i][0] for i in range(len(pairs))]
con2=[ pairs[i][1] for i in range(len(pairs))]
sentence1=[]
sentence2=[]
for i in range(len(con1)) :
  if len(con1[i]) <=100 and len(con2[i])  <=100:
    sentence1.append(con1[i])
    sentence2.append(con2[i])
  else : pass
ts1=[tokenizer.encode(x, bos=True, eos=True) for x in sentence1]
ts2=[tokenizer.encode(x, bos=True, eos=True) for x in sentence2]
bsz=len(sentence1)
tokens1=torch.full((bsz, 100),tokenizer.pad_id)
for k, t in enumerate(ts1):
            tokens1[k, : len(t)] = torch.tensor(t)

tokens2=torch.full((bsz, 100),tokenizer.pad_id)
for k, t in enumerate(ts2):
            tokens2[k, : len(t)] = torch.tensor(t)

In [None]:
sentence[0:10]

['Senjō no Valkyria 3 : <unk> Chronicles ( Japanese : 戦場のヴァルキュリア3 , lit . Valkyria of the Battlefield 3 ) , commonly referred to as Valkyria Chronicles III outside Japan , is a tactical role @-@ playing video game developed by Sega and Media.Vision for the PlayStation Portable . Released in January 2011 in Japan , it is the third game in the Valkyria series . <unk> the same fusion of tactical and real @-@ time gameplay as its predecessors , the story runs parallel to the first game and follows the " Nameless " , a penal military unit serving the nation of Gallia during the Second Europan War who perform secret black operations and are pitted against the Imperial unit " <unk> Raven " .',
 "The game began development in 2010 , carrying over a large portion of the work done on Valkyria Chronicles II . While it retained the standard features of the series , it also underwent multiple adjustments , such as making the game more <unk> for series newcomers . Character designer <unk> Honjou and

In [None]:
pairs[0:10]

[['they do to !', 'they do not !'],
 ['she okay ?', 'i hope so .'],
 ['wow', 'let s go .'],
 ['i m kidding . you know how sometimes you just become this persona ? and you don t know how to quit ?',
  'no'],
 ['no', 'okay you re gonna need to learn how to lie .'],
 ['i figured you d get to the good stuff eventually .', 'what good stuff ?'],
 ['what good stuff ?', 'the real you .'],
 ['the real you .', 'like my fear of wearing pastels ?'],
 ['do you listen to this crap ?', 'what crap ?'],
 ['what crap ?',
  'me . this endless . . .blonde babble . i m like boring myself .']]

#ft

In [None]:
model = Transformer(ModelArgs)


In [None]:
model.to('cuda:0')

Transformer(
  (tok_embeddings): Embedding(3000, 512)
  (layers): ModuleList(
    (0-2): 3 x TransformerBlock(
      (attention): Attention(
        (wq): Linear(in_features=512, out_features=512, bias=True)
        (wk): Linear(in_features=512, out_features=512, bias=True)
        (wv): Linear(in_features=512, out_features=512, bias=True)
        (wo): Linear(in_features=512, out_features=512, bias=True)
      )
      (feed_forward): FeedForward(
        (w1): Linear(in_features=512, out_features=1536, bias=True)
        (w2): Linear(in_features=1536, out_features=512, bias=True)
        (w3): Linear(in_features=512, out_features=1536, bias=True)
      )
      (attention_norm): RMSNorm()
      (ffn_norm): RMSNorm()
    )
  )
  (norm): RMSNorm()
  (output): Linear(in_features=512, out_features=3000, bias=True)
)

In [None]:
model=torch.load('model_pt.pth')

In [None]:
model=torch.load('model_pre.pth')

In [None]:
model.to('cuda:0')
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

In [None]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

In [None]:
from peft import LoraConfig, get_peft_model

In [None]:
config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["wq","wk","wv","wo"],
    lora_dropout=0.05,
    bias="none",
    #task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

In [None]:
model.load_state_dict(torch.load('model_weight_ft (3).pth'))

<All keys matched successfully>

In [None]:
dataset = torch.utils.data.TensorDataset(tokens1[0:1280], tokens2[0:1280])
Loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=32,
                                         shuffle=True,
                                        )

In [None]:
input=tokens1[0:10].to(device)

In [None]:
model

In [None]:
model(input,0)

In [None]:
model

In [None]:
model.load_state_dict(torch.load('model_pt_weight (1).pth'))

<All keys matched successfully>

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
print_trainable_parameters(model)

trainable params: 98304 || all params: 13417400 || trainable%: 0.7326605750741574


In [None]:
for i, data in enumerate(Loader, 0):
        print(i)
        inputs, labels = data[0].to('cuda:0'),data[1].to('cuda:0')

        outputs = model(inputs,0)
        loss = criterion(outputs.transpose(1,2), labels)
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()

        del inputs,labels,outputs,loss
        torch.cuda.empty_cache()

In [None]:
model

In [None]:
torch.save(model,'model_pt.pth')

In [None]:
torch.save(model.state_dict(), 'model_weight.pth')

In [None]:
ge=LM(model,tokenizer)

#rl

In [None]:
!pip install -U sentence-transformers

In [None]:

from sentence_transformers import SentenceTransformer, util
from peft import LoraConfig, get_peft_model

In [None]:
vocab_size=3000

In [None]:
class RewardModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.net=nn.Sequential(nn.Linear(vocab_size,vocab_size//2 , bias = False),
                       nn.Linear(vocab_size//2,1 , bias = False))
  def forward(self,x):
     return self.net(x)



In [None]:
input=tokens1[0:32].to(device)

In [None]:
model=Transformer(ModelArgs)
ge=LM(model,tokenizer)
rewards=RewardModel()
rewards.to('cuda:0')

In [None]:
def clipped_value_loss(values, rewards, old_values, clip):
    value_clipped = old_values + (values - old_values).clamp(-clip, clip)
    value_loss_1 = (value_clipped.flatten() - rewards) ** 2
    value_loss_2 = (values.flatten() - rewards) ** 2
    return torch.mean(torch.max(value_loss_1, value_loss_2))

In [None]:
class Actor(nn.Module):
  def __init__(self,model):
        super().__init__()
        self.actor=model
  def forward(self,x,mask = None,):
      action_logits = self.actor(x,start_pos)

      return action_logits

In [None]:
class Critic(nn.Module):
  def __init__(self,model):
        super().__init__()
        self.critic=model
        self.value_head = nn.Sequential(
            nn.Linear(3000, 1),)
  def forward(self,x,mask = None,start_pos=None):
            critic_embeds = self.critic(x,start_pos)
            out=critic_embeds[:,-1,:].clone()
            values = self.value_head(out)
            return values


In [None]:
def Advantages(reward,values):
  lastgaelam=0
  advantages = torch.zeros_like(reward).to(device)
  for t in reversed(range(len(reward))):
    if t == len(reward) - 1:
       nextvalues = values[t]
    else:
                    nextvalues = values[t + 1]
    delta = reward[t] + gamma * nextvalues  - values[t]
    advantages[t] = lastgaelam = delta + gamma * gae_lambda * lastgaelam
  return advantages

In [None]:
def g_token(res):
  seq_token=[tokenizer.encode(x, bos=True, eos=True) for x in res]
  max_batch_size: int = 32
  max_seq_len: int = 2048
  min_prompt_size = min([len(t) for t in seq_token])
  max_prompt_size = max([len(t) for t in seq_token])
  total_len = min(max_seq_len, 20 + max_prompt_size)

  bsz=len(seq_token)

  seq_tokens=torch.full((bsz, total_len),tokenizer.pad_id)
  for k, t in enumerate(seq_token):
            seq_tokens[k, : len(t)] = torch.tensor(t)
  return seq_tokens

In [None]:
model=Transformer(ModelArgs)
model.to('cuda:0')

In [None]:
model=Transformer(ModelArgs)
model.to('cuda:0')
ge=LM(model,tokenizer)
rewards=RewardModel()
rewards.to('cuda:0')

RewardModel(
  (net): Sequential(
    (0): Linear(in_features=3000, out_features=1500, bias=False)
    (1): Linear(in_features=1500, out_features=1, bias=False)
  )
)

In [None]:
st = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')


In [None]:
reward_loss= nn.CrossEntropyLoss()
reward_opt=optim.Adam(rewards.parameters(), lr=0.001)

In [None]:
model.eval()
rewards.train()
episodes=5
epoch=10

for _ in range(episodes):
    sample=[randint(0, 28444) for i in range(32)]
    input=[sentence1[i]  for i in sample]
    label=[sentence2[i]  for i in sample]
    res=ge.generate(input,20,0.5)
    output=[res[i][len(input[i]):] for i in range(32)]
    ebd1=st.encode(output, convert_to_tensor=True)
    ebd2=st.encode(label, convert_to_tensor=True)
    score=torch.diag(util.pytorch_cos_sim(ebd1, ebd2)).data*torch.tensor([10]).to(device)
    seq=g_token(res)
    a_logi=model(seq.to(device),0)
    for j in range(epoch):
      reward=rewards(a_logi)
      re_loss=reward_loss(reward.squeeze(-1),score.long())

      reward_opt.zero_grad()
      re_loss.backward(retain_graph=True)
      reward_opt.step()

In [None]:
torch.save(rewards.state_dict(), 'reward_model_weight.pth')

In [None]:
eps_clip=0.2
value_clip=0.4
beta_s=0.01
gamma=0.98
gae_lambda=0.94

In [None]:
import copy

In [None]:
config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["output"],
    lora_dropout=0.05,
    bias="none",
    #task_type="CAUSAL_LM"
)

In [None]:
model=torch.load('/content/model_pt.pth')

In [None]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

In [None]:

model = get_peft_model(model, config)

In [None]:
print_trainable_parameters(model)

trainable params: 28096 || all params: 13445496 || trainable%: 0.20896216844659357


In [None]:

actor=model
actor.to('cuda:0')
actor_optimizer = optim.Adam(actor.parameters(), lr=0.001)

critic=Critic(copy.deepcopy(actor))
critic.to('cuda:0')
critic_optimizer=optim.Adam(critic.parameters(), lr=0.001)

rewards=RewardModel()
rewards.to('cuda:0')
rewards.load_state_dict(torch.load('reward_model_weight.pth'))
ge=LLaMA(actor,tokenizer)

In [None]:
actor

In [None]:
episodes=1
ecpoch=2

In [None]:
sample

In [None]:
import random

In [None]:
actor.train()
critic.train()
rewards.eval()
for _ in range(episodes):
    prompts=sample(sentence1, 32)
    res=ge.generate(prompts,20,0.5)
    state_action=g_token(res).to(device)

    for j in range(ecpoch):
        seq_toknes=state_action
        action_logi=actor(seq_toknes,0)
        reward=rewards(action_logi)
        value=critic(seq_toknes,start_pos=0)
        advantages=Advantages(reward,value)
        old_action_probs=action_logi.softmax(dim=-1)[0]
        old_log_probs=(action_logi.softmax(-1).gather(-1,seq_toknes[...,None].to(device)).squeeze(-1).log())[0]
        old_values=value[0]
        for i in range(len(prompts)):
            seq=seq_toknes[i].view(1,-1)

            action_logi=actor(seq,0)
            action_prob=action_logi.softmax(dim=-1)
            values=critic(seq,start_pos=0)
            a_log_prob=(action_prob.gather(-1,seq[...,None].to(device)).squeeze(-1).log())
            entropies = (action_prob* action_prob.log()).sum(dim = -1)
            kl_divs=(action_prob*(action_prob.log()-old_action_probs.log())).sum(dim=-1)
            reward=rewards(action_logi)
            reward=reward-kl_divs
            ada=reward - old_values
            ratios = (a_log_prob - old_log_probs).exp()

            surr1 = ratios * (advantages[i]+ada)
            surr2 = ratios.clamp(1-eps_clip, 1 + eps_clip) * (advantages[i]+ada)
            policy_loss = - torch.min(surr1, surr2) - beta_s* entropies
            loss= policy_loss.mean()
            loss.backward(retain_graph=True)

            actor_optimizer.step()
            actor_optimizer.zero_grad()

            value_clipped=old_values+ (values - old_values).clamp(-value_clip, value_clip)
            value_loss_1=(value_clipped.flatten() - reward) ** 2
            value_loss_2 = (values.flatten() - reward) ** 2
            value_loss = torch.mean(torch.max(value_loss_1, value_loss_2)).mean()

            value_loss.backward(retain_graph=True)
            critic_optimizer.step()
            critic_optimizer.zero_grad()


            old_action_probs=action_prob
            old_log_probs=a_log_prob
            old_values=values


In [None]:
torch.save(actor,'model_rl_pt.pth')

#llm

In [None]:
model=torch.load('model_rl_ft.pth')
model.to('cuda:0')
ge=LM(model,tokenizer)

In [None]:
input=sample(sentence1,1)

In [None]:
res=ge.generate(input,20,0.5)

In [None]:
res[0][len(input[0]):]

In [None]:
def llm(input,pre_len,temp):
    g_s=ge.generate(input,pre_len,temp)
    res=g_s[len(input);]
    return  res