# Librairies

In [2]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

## Let's build Bert

## Custom bert

In [21]:
#Q of shape (Batch size, sequence_len)


class Attention(nn.Module):
    def __init__(self, dk, dv, d_model,attention_probs_dropout_prob,device='cpu'):
        super().__init__()
        self.WQ = nn.Linear(d_model,dk, device = device)
        self.WK = nn.Linear(d_model,dk, device = device)
        self.WV = nn.Linear(d_model,dv, device = device)
        self.dk = dk
        self.dropout = nn.Dropout(attention_probs_dropout_prob)


    def forward(self, q, k, v ):
        q = self.WQ(q)
        k = self.WK(k)
        v = self.WV(v)
        attention_scores  = F.softmax(torch.matmul(q,k.transpose(1,2))/np.sqrt(self.dk),dim=-1)
        
        attention_matrix  = torch.matmul(attention_scores, v)

        return self.dropout(attention_matrix)
    

class MultiHeadAttention(nn.Module):
    def __init__(self, n_head, dk, dv, d_model,attention_probs_dropout_prob, device='cpu'):
        super().__init__()
        self.attentions = nn.ModuleList([Attention(dk,dv, d_model,attention_probs_dropout_prob, device) for _ in range(n_head)])

        self.output = nn.Linear(n_head*dv, d_model, device = device )


    def forward(self, q, k, v):

        attention_matrices = [attention(q,k,v) for attention in self.attentions]

        attentions = torch.cat(attention_matrices, dim=-1)

        return self.output(attentions)
    

class FeedForward(nn.Module):
    def __init__(self, d_model, hidden_dim,hidden_dropout_prob, device = 'cpu'):
        super().__init__()
        self.ff2 = nn.Linear(hidden_dim, d_model, device = device)
        self.ff1 = nn.Linear(d_model, hidden_dim, device = device)
        self.GELU = nn.GELU()
        self.dropout = nn.Dropout(hidden_dropout_prob)  


    def forward(self, x):
        x = self.ff1(x)
        x = self.GELU(x)
        x = self.ff2(x)

        return self.dropout(x)

class BERTLayer(nn.Module):
    def __init__(self, n_head,dk, dv, d_model, hidden_dim,hidden_dropout_prob,attention_probs_dropout_prob, device = 'cpu'):
        super().__init__()
        self.mha = MultiHeadAttention( n_head, dk, dv, d_model,attention_probs_dropout_prob, device)
        

        self.feedforward = FeedForward(d_model, hidden_dim,hidden_dropout_prob, device)

    
        self.norm1 =  nn.LayerNorm(d_model,device = device)
        self.norm2 = nn.LayerNorm(d_model,device=device)



    def forward(self,x):
        
        x =  self.norm1(self.mha(x,x,x)+x)
        x = self.norm2(self.feedforward(x)+x)

        return x
    


class Bert(nn.Module):
    def __init__(self, n_layer, n_head,dk, dv, d_model, hidden_dim, vocab_size,max_seq_len,hidden_dropout_prob,attention_probs_dropout_prob, device = 'cpu'):
        super().__init__()
        self.layers = nn.ModuleList([BERTLayer(n_head,dk, dv, d_model, hidden_dim,hidden_dropout_prob,attention_probs_dropout_prob, device) for _ in range(n_layer)])
        self.n_layer = n_layer
        self.tok_embedding = nn.Embedding(vocab_size, d_model, device=device)
        self.pos_embedding = nn.Embedding(max_seq_len, d_model, device=device)
        self.segment_embedding = nn.Embedding(2, d_model, device=device) 
        self.device = device


    def forward(self, x, segment_ids):
        batch_size, seq_len = x.size()
        positions = torch.arange(0, seq_len, device=self.device).unsqueeze(0).expand(batch_size, seq_len)

        x = (
            self.tok_embedding(x)
            + self.pos_embedding(positions)
            + self.segment_embedding(segment_ids)
        )
        for layer in self.layers:
            x =  layer(x)

        return x 



        
## In the paper of the paper of the BERTbased the hyperparameter where:
device = ('cuda' if torch.cuda.is_available() else 'cpu')
model = Bert(
    n_layer=12,
    n_head=12,
    dk=64,
    dv=64,
    d_model=768,
    hidden_dim=3072,
    vocab_size=30522,
    max_seq_len=512,
    hidden_dropout_prob = 0.1,
    attention_probs_dropout_prob = 0.1,
    device=device
)

## Load weight from huggingFace

In [23]:
from transformers import BertModel

#load the weight from huggingface
hf_model = BertModel.from_pretrained("bert-base-uncased")
hf_state_dict = hf_model.state_dict()

model.tok_embedding.weight.data.copy_(hf_state_dict['embeddings.word_embeddings.weight'])
model.pos_embedding.weight.data.copy_(hf_state_dict['embeddings.position_embeddings.weight'])
model.segment_embedding.weight.data.copy_(hf_state_dict['embeddings.token_type_embeddings.weight'])

for layer_idx in range(12):
    custom_layer = model.layers[layer_idx]
    hf_prefix = f'encoder.layer.{layer_idx}'

    hf_q = hf_state_dict[f'{hf_prefix}.attention.self.query.weight']
    hf_k = hf_state_dict[f'{hf_prefix}.attention.self.key.weight']
    hf_v = hf_state_dict[f'{hf_prefix}.attention.self.value.weight']
    hf_qb = hf_state_dict[f'{hf_prefix}.attention.self.query.bias']
    hf_kb = hf_state_dict[f'{hf_prefix}.attention.self.key.bias']
    hf_vb = hf_state_dict[f'{hf_prefix}.attention.self.value.bias']

    for head_idx in range(12):
        start = head_idx * 64
        end = (head_idx + 1) * 64

        attn_head = custom_layer.mha.attentions[head_idx]
        attn_head.WQ.weight.data.copy_(hf_q[start:end, :])
        attn_head.WK.weight.data.copy_(hf_k[start:end, :])
        attn_head.WV.weight.data.copy_(hf_v[start:end, :])

        attn_head.WQ.bias.data.copy_(hf_qb[start:end])
        attn_head.WK.bias.data.copy_(hf_kb[start:end])
        attn_head.WV.bias.data.copy_(hf_vb[start:end])

    custom_layer.mha.output.weight.data.copy_(
        hf_state_dict[f'{hf_prefix}.attention.output.dense.weight']
    )
    custom_layer.mha.output.bias.data.copy_(
        hf_state_dict[f'{hf_prefix}.attention.output.dense.bias']
    )

    custom_layer.norm1.weight.data.copy_(
        hf_state_dict[f'{hf_prefix}.attention.output.LayerNorm.weight']
    )
    custom_layer.norm1.bias.data.copy_(
        hf_state_dict[f'{hf_prefix}.attention.output.LayerNorm.bias']
    )

    custom_layer.feedforward.ff1.weight.data.copy_(
        hf_state_dict[f'{hf_prefix}.intermediate.dense.weight']
    )
    custom_layer.feedforward.ff1.bias.data.copy_(
        hf_state_dict[f'{hf_prefix}.intermediate.dense.bias']
    )
    custom_layer.feedforward.ff2.weight.data.copy_(
        hf_state_dict[f'{hf_prefix}.output.dense.weight']
    )
    custom_layer.feedforward.ff2.bias.data.copy_(
        hf_state_dict[f'{hf_prefix}.output.dense.bias']
    )

    custom_layer.norm2.weight.data.copy_(
        hf_state_dict[f'{hf_prefix}.output.LayerNorm.weight']
    )
    custom_layer.norm2.bias.data.copy_(
        hf_state_dict[f'{hf_prefix}.output.LayerNorm.bias']
    )

print("All weights loaded from HuggingFace into the model!")


All weights loaded from HuggingFace into the model!


# RAG pipeline with LLM

## Use our custom bert for embeddings creation

In [46]:
from langchain.embeddings.base import Embeddings
class customBERTEmbedding(Embeddings):
    def __init__(self, model, tokenizer, device='cpu'):
        super().__init__()
        self.model = model
        self.tokenizer = tokenizer
        self.device = device

    def embed_documents(self, texts):
        return [self._encode(text) for text in texts]
    
    def embed_query(self, text):
        return self._encode(text)
    
    def _encode(self,text):
        tokens = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
        input_ids = tokens["input_ids"].to(self.device)
        token_type_ids = tokens["token_type_ids"].to(self.device)

        with torch.no_grad():
            output = self.model(input_ids, token_type_ids)
        return output[:, 0, :].squeeze(0).cpu().numpy()
    





In [115]:
tokenizer =  BertTokenizer.from_pretrained("bert-base-uncased")
text ="i like"
tokens = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
with torch.no_grad():
    a= model(tokens['input_ids'].to(device),tokens['token_type_ids'].to(device))

a[:, 0, :].squeeze(0).cpu().numpy()

array([-2.65726030e-01, -2.13014893e-02, -8.69558901e-02,  1.48365214e-01,
        1.76800609e-01, -3.63985479e-01,  4.23639625e-01,  1.46304592e-01,
       -5.11988819e-01, -2.00829223e-01, -4.06282097e-01,  4.06917967e-02,
       -2.24899203e-01,  1.40499860e-01,  3.65187377e-02,  1.04855657e-01,
       -3.56116056e-01,  3.78544360e-01,  1.37843788e-01, -2.61878073e-01,
       -8.52976367e-03, -3.59228045e-01, -5.17454684e-01, -2.01545537e-01,
       -2.05509856e-01, -3.38057280e-01,  1.83744028e-01,  4.14895236e-01,
        6.08539172e-02,  2.84590006e-01, -8.40660259e-02,  4.02905345e-02,
        1.19624868e-01, -3.58762801e-01,  1.91606462e-01, -2.62056980e-02,
       -1.53285637e-01, -6.93488866e-03, -2.34032542e-01, -8.47237278e-03,
        1.14708841e-01,  3.33218277e-02,  4.20786083e-01, -4.48948033e-02,
        7.21191093e-02,  3.09038967e-01, -2.34791279e+00,  2.59799254e-03,
       -2.70282507e-01,  3.30774561e-02,  5.01673698e-01,  2.88069189e-01,
        1.01765446e-01,  

## Load the data : Question answer from standford and build the vector data base

In [34]:
from datasets import load_dataset

dataset = load_dataset("squad")
print(dataset['train'][0])


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating train split: 100%|██████████| 87599/87599 [00:00<00:00, 912889.02 examples/s]
Generating validation split: 100%|██████████| 10570/10570 [00:00<?, ? examples/s]

{'id': '5733be284776f41900661182', 'title': 'University_of_Notre_Dame', 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.', 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?', 'answers': {'text': ['Saint Bernadette Soubirous'], 'answer_start': [515]}}





In [117]:
from langchain.vectorstores import Chroma
from transformers import BertTokenizer

#Load tokenizer from Huggingface
tokenizer =  BertTokenizer.from_pretrained("bert-base-uncased")
#Instantiate the custom Embedding using our Bert model

custom_embedding = customBERTEmbedding(model=model, tokenizer=tokenizer, device=device)

#retreive the data context from our data and create our vector database with chroma db
# this can take a lot of time
contexts = list(set([item['context'] for item in dataset['train']]))
db = Chroma.from_texts(contexts, embedding=custom_embedding, persist_directory="./chroma_db",)
retriever = db.as_retriever()

## Let's now build the RAG

In [73]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
import streamlit as st

@st.cache_resource
def load_generator():
    tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large")
    model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large")

    pipe = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=128,
        do_sample=True,
        top_k=50,
        top_p=0.5
    )

    return HuggingFacePipeline(pipeline=pipe)

In [53]:
dataset['train'][0]

{'id': '5733be284776f41900661182',
 'title': 'University_of_Notre_Dame',
 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.',
 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?',
 'answers': {'text': ['Saint Bernadette Soubirous'], 'answer_start': [515]}}

In [74]:

llm = load_generator()

# Build the RAG Chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)
query ="What is the capital of France?"
qa_chain.run(query)

Device set to use cuda:0


'Use the following pieces of context to answer the question at the end. If you don\'t know the answer to the question, just say that you do not know, don\'t try to make up an answer, and if you do know, please tell us the answer, not the answer you think you know, and don\'t tell us what you think is wrong with the answer.If you\'re not sure, ask your parents.In the U.S. News & World Report\'s "America’s Best Colleges" 2016 issue, Kansas State University was ranked tied for 90th among national universities.=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-'