In [10]:
from huggingface_hub import login

# This will prompt you to enter your Hugging Face token
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList
import gradio as gr
from PyPDF2 import PdfReader
import numpy as np

In [4]:
# Load model and tokenizer from Hugging Face
model_name = "meta-llama/Meta-Llama-3.1-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to("cuda")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [5]:
# Setting pad_token to eos_token for padding
tokenizer.pad_token = tokenizer.eos_token

In [16]:
# Step 2: Load and Process HR Policy Documents
def load_and_split_pdf(pdf_path):
    """Loads and splits the PDF document into manageable chunks of text."""
    reader = PdfReader(pdf_path)
    text_chunks = []
    for page in reader.pages:
        text = page.extract_text()
        if text:
            text_chunks.extend(text.split("\n\n"))  # Split into chunks by paragraphs
    return text_chunks

In [7]:
# Step 3: Generate Embeddings using LLaMA
def generate_embeddings(text):
    """Generate embeddings using LLaMA model."""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to("cuda")
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
    hidden_states = outputs.hidden_states[-1]  # Get the last hidden state
    return hidden_states.mean(dim=1).cpu().numpy()  # Return the mean of hidden states

In [8]:
# Step 4: Cosine Similarity Function
def cosine_similarity(a, b):
    """Calculate the cosine similarity between two vectors."""
    a = a.flatten()  # Flatten the vector to ensure it's one-dimensional
    b = b.flatten()  # Flatten the vector to ensure it's one-dimensional
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


In [17]:
# Custom stopping criteria to stop generation at the end of a complete thought
class IntelligentStoppingCriteria(StoppingCriteria):
    def __init__(self, tokenizer, initial_length, threshold=0.9):
        self.tokenizer = tokenizer
        self.initial_length = initial_length
        self.threshold = threshold  # Confidence threshold for stopping

    def __call__(self, input_ids, scores, **kwargs):
        # Decode the generated text
        generated_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
        
        # Stop if we reached the end of a sentence and the score is above the threshold
        if len(generated_text) > self.initial_length and generated_text.endswith('.'):
            if scores is not None:
                avg_score = torch.mean(torch.stack(scores)).item()  # Calculate average confidence
                if avg_score > self.threshold:
                    return True
        return False


In [18]:
# Step 5: Build Question-Answering System
def get_best_answer(user_query, text_chunks, embeddings):
    """Finds the most relevant text chunk based on user query and generates a response."""
    query_embedding = generate_embeddings(user_query)
    similarities = [cosine_similarity(query_embedding, emb) for emb in embeddings]
    best_chunk_index = np.argmax(similarities)
    best_chunk = text_chunks[best_chunk_index]
    
    # Refine prompt to guide the model
    prompt = f"Answer the following query concisely but completely based on the provided context:\n\n{user_query}\n\nContext: {best_chunk}"

    # Generate response using the best text chunk
    inputs = tokenizer(prompt, return_tensors="pt", truncation=False, padding=True).to("cuda")
    stopping_criteria = StoppingCriteriaList([IntelligentStoppingCriteria(tokenizer, len(prompt))])

    with torch.no_grad():
        response = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=500,  # Allow sufficient tokens but let stopping criteria control the length
            stopping_criteria=stopping_criteria,
            pad_token_id=tokenizer.eos_token_id
        )
    
    generated_text = tokenizer.decode(response[0], skip_special_tokens=True)
    return generated_text.strip()


In [19]:
# Step 6: Design Gradio Interface
def chatbot_interface(user_query):
    """Handles the interaction between the user and the chatbot."""
    try:
        text_chunks = load_and_split_pdf("nestle_hr_policy.pdf")
        embeddings = [generate_embeddings(chunk) for chunk in text_chunks]
        answer = get_best_answer(user_query, text_chunks, embeddings)
        return answer
    except Exception as e:
        print(f"An error occurred: {e}")
        return "Error: Something went wrong. Please try again later."

In [20]:
# Step 7: Gradio Interface for User Interaction
interface = gr.Interface(
    fn=chatbot_interface, 
    inputs=gr.Textbox(lines=2, placeholder="Enter your HR-related question here..."), 
    outputs="text", 
    title="Nestlé HR Assistant",
    description="Ask any question related to Nestlé's HR policies, and the assistant will provide you with accurate information.",
    live=False
)

In [21]:
# Launch the interface
interface.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




In [35]:
from docx import Document

# Create a new Document
doc = Document()

# Add a title
doc.add_heading('Nestlé HR Assistant Final Code Implementation', level=1)

# Add the code block to the document
code_block = """
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList
import gradio as gr
from PyPDF2 import PdfReader
import numpy as np
from huggingface_hub import login

# Step 1: Login to Hugging Face (ensure you have the appropriate access)
login()  # This will prompt you to enter your Hugging Face token

# Load LLaMA model and tokenizer
model_name = "meta-llama/Meta-Llama-3.1-8B"
tokenizer = LlamaTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  # Set pad_token to eos_token
model = LlamaForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).to("cuda")

# Step 2: Load and Process HR Policy Documents
def load_and_split_pdf(pdf_path):
    """"""Loads and splits the PDF document into manageable chunks of text.""""""
    reader = PdfReader(pdf_path)
    text_chunks = []
    for page in reader.pages:
        text = page.extract_text()
        if text:
            text_chunks.extend(text.split("\n\n"))  # Split into chunks by paragraphs
    return text_chunks

# Step 3: Generate Embeddings using LLaMA
def generate_embeddings(text):
    """"""Generate embeddings using LLaMA model.""""""
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to("cuda")
    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
    hidden_states = outputs.hidden_states[-1]  # Get the last hidden state
    return hidden_states.mean(dim=1).cpu().numpy()  # Return the mean of hidden states

# Step 4: Cosine Similarity Function
def cosine_similarity(a, b):
    """"""Calculate the cosine similarity between two vectors.""""""
    a = a.flatten()  # Flatten the vector to ensure it's one-dimensional
    b = b.flatten()  # Flatten the vector to ensure it's one-dimensional
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Custom stopping criteria to stop generation at the end of a complete thought
class IntelligentStoppingCriteria(StoppingCriteria):
    def __init__(self, tokenizer, initial_length, threshold=0.9):
        self.tokenizer = tokenizer
        self.initial_length = initial_length
        self.threshold = threshold  # Confidence threshold for stopping

    def __call__(self, input_ids, scores, **kwargs):
        # Decode the generated text
        generated_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
        
        # Stop if we reached the end of a sentence and the score is above the threshold
        if len(generated_text) > self.initial_length and generated_text.endswith('.'):
            if scores is not None:
                avg_score = torch.mean(torch.stack(scores)).item()  # Calculate average confidence
                if avg_score > self.threshold:
                    return True
        return False

# Step 5: Build Question-Answering System
def get_best_answer(user_query, text_chunks, embeddings):
    """"""Finds the most relevant text chunk based on user query and generates a response.""""""
    query_embedding = generate_embeddings(user_query)
    similarities = [cosine_similarity(query_embedding, emb) for emb in embeddings]
    best_chunk_index = np.argmax(similarities)
    best_chunk = text_chunks[best_chunk_index]
    
    # Refine prompt to guide the model
    prompt = f"Answer the following query concisely but completely based on the provided context:\n\n{user_query}\n\nContext: {best_chunk}"

    # Generate response using the best text chunk
    inputs = tokenizer(prompt, return_tensors="pt", truncation=False, padding=True).to("cuda")
    stopping_criteria = StoppingCriteriaList([IntelligentStoppingCriteria(tokenizer, len(prompt))])

    with torch.no_grad():
        response = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=500,  # Allow sufficient tokens but let stopping criteria control the length
            stopping_criteria=stopping_criteria,
            pad_token_id=tokenizer.eos_token_id
        )
    
    generated_text = tokenizer.decode(response[0], skip_special_tokens=True)
    return generated_text.strip()

# Step 6: Design Gradio Interface
def chatbot_interface(user_query):
    """"""Handles the interaction between the user and the chatbot.""""""
    try:
        text_chunks = load_and_split_pdf("nestle_hr_policy.pdf")
        embeddings = [generate_embeddings(chunk) for chunk in text_chunks]
        answer = get_best_answer(user_query, text_chunks, embeddings)
        return answer
    except Exception as e:
        print(f"An error occurred: {e}")
        return "Error: Something went wrong. Please try again later."

# Step 7: Gradio Interface for User Interaction
interface = gr.Interface(
    fn=chatbot_interface, 
    inputs=gr.Textbox(lines=2, placeholder="Enter your HR-related question here..."), 
    outputs="text", 
    title="Nestlé HR Assistant",
    description="Ask any question related to Nestlé's HR policies, and the assistant will provide you with accurate information.",
    live=False
)

# Launch the interface
interface.launch()

"""

# Add code block to the document
doc.add_paragraph(code_block)

# Save the document
doc.save("Nestle_HR_Assistant_Code.docx")
