In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [None]:

from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Retrieve the access token
access_token = os.getenv("ACCESS_TOKEN")

In [None]:
model_id = "Qwen/Qwen3-0.6B"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id, token=access_token)
model = AutoModelForCausalLM.from_pretrained(model_id, token=access_token)

In [None]:
tokenizer.save_pretrained(f"tokenizer/{model_id}")
model.save_pretrained(f"model/{model_id}")

In [None]:
tokenizer = AutoTokenizer.from_pretrained(f"tokenizer/{model_id}", local_files_only = True)
model = AutoModelForCausalLM.from_pretrained(f"model/{model_id}", local_files_only=True)

In [None]:
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

In [None]:
# Sample prompt
prompt = "what is the capital of france"
inputs = tokenizer(prompt, return_tensors="pt").to(device)


In [None]:
# Generate text
with torch.no_grad():
    outputs = model.generate(
        inputs["input_ids"],
        max_length=100,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        do_sample=True,
        num_return_sequences=1
    )

In [None]:
# Decode and print
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)

In [None]:

from transformers import AutoModelForCausalLM, AutoTokenizer

class QwenChatbot:
    def __init__(self, model_id="Qwen/Qwen3-0.6B"):
        self.tokenizer = AutoTokenizer.from_pretrained(f"tokenizer/{model_id}", local_files_only = True)
        self.model = AutoModelForCausalLM.from_pretrained(f"model/{model_id}", local_files_only = True)
        self.history = []
        

    def generate_response(self, user_input):
        messages = self.history + [{"role": "user", "content": user_input}]

        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
        )

        inputs = self.tokenizer(text, return_tensors="pt")
        response_ids = self.model.generate(**inputs, max_new_tokens=32768)[0][len(inputs.input_ids[0]):].tolist()
        response = self.tokenizer.decode(response_ids, skip_special_tokens=True)

        # Update history
        self.history.append({"role": "user", "content": user_input})
        self.history.append({"role": "assistant", "content": response})

        return response

# Example Usage
if __name__ == "__main__":
    chatbot = QwenChatbot()

    # First input (without /think or /no_think tags, thinking mode is enabled by default)
    user_input_1 = "How many r's in strawberries?"
    print(f"User: {user_input_1}")
    response_1 = chatbot.generate_response(user_input_1)
    print(f"Bot: {response_1}")
    print("----------------------")

    # Second input with /no_think
    user_input_2 = "Then, how many r's in blueberries? /no_think"
    print(f"User: {user_input_2}")
    response_2 = chatbot.generate_response(user_input_2)
    print(f"Bot: {response_2}") 
    print("----------------------")

    # Third input with /think
    user_input_3 = "Really? /think"
    print(f"User: {user_input_3}")
    response_3 = chatbot.generate_response(user_input_3)
    print(f"Bot: {response_3}")


In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import List, Dict, Any, Optional
from rag import Rag
import torch
class QwenRAGChatbot:
    def __init__(self, model_id="Qwen/Qwen3-0.6B", faiss_index_path="faiss_index"):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(f"tokenizer/{model_id}", local_files_only=True)
        self.model = AutoModelForCausalLM.from_pretrained(f"model/{model_id}", local_files_only=True).to(self.device)
        self.rag = Rag(faiss_index_path=faiss_index_path)
        self.history = []
    
    def format_docs(self, docs):
        return "\n\n".join([doc.page_content for doc, _ in docs])
    
    def retrieve(self, query: str) -> List:
        return self.rag.retriever(query)
    
    def generate_answer(self, query: str, context: str) -> str:
        prompt = f"""Answer the question based on the provided context. If you cannot find the answer in the context, 
say that you don't know but try to provide general information related to the question.

Context:
{context}

Question: {query}

Answer:"""
        
        messages = self.history + [{"role": "user", "content": prompt}]
        
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
        )
        
        inputs = self.tokenizer(text, return_tensors="pt").to(self.device)
        response_ids = self.model.generate(**inputs, max_new_tokens=1024)[0][len(inputs.input_ids[0]):].tolist()
        response = self.tokenizer.decode(response_ids, skip_special_tokens=True)
        
        # Update history with the actual user query (not the augmented prompt)
        self.history.append({"role": "user", "content": query})
        self.history.append({"role": "assistant", "content": response})
        
        return response
    
    def chat(self, query: str) -> Dict[str, Any]:
        try:
            retrieved_docs = self.retrieve(query)
            context = self.format_docs(retrieved_docs)
            answer = self.generate_answer(query, context)
            
            return {
                "answer": answer,
                "sources": [{"content": doc.page_content, 
                             "source": doc.metadata.get("source", "Unknown"),
                             "score": score} 
                            for doc, score in retrieved_docs]
            }
        except Exception as e:
            return {"error": str(e)}
            
    def clear_history(self):
        self.history = []




  from .autonotebook import tqdm as notebook_tqdm


In [2]:

chatbot = QwenRAGChatbot()
    
query = "What is your product?"
response = chatbot.chat(query)

In [None]:
print(f"Answer: {response['answer']}\n")    

Question: What is your product?

Answer: <think>
Okay, let me see. The user is asking for the answer to the question "What is your product?" based on the provided context. 

Looking at the documents, Document 1 is the company overview. In that document, there's a mention: "TechTrend Innovations is a leading technology company specializing in artificial intelligence and machine learning solutions." So the product here is AI and machine learning solutions.

Then there's Document 4, which is the product specifications for IntelliBot Chatbot. The product name is IntelliBot, version 2.1, released March 10, 2025, and the key features include AI-driven chatbots for customer service. The system requirements and pricing are listed there too. 

So, the product mentioned in the FAQs is the IntelliBot chatbot. The answer should be that the product is IntelliBot, which is an AI-driven chatbot. 

I need to make sure there's no conflicting information. The FAQs section only includes the question and 

In [18]:
import re

# Extract content within <think> and </think>
think_content = re.search(r"<think>(.*?)</think>", response['answer'], re.DOTALL)
if think_content:
    print("Content within <think> tags:")
    print(think_content.group(1).strip())
else:
    print("No content found within <think> tags.")

# Remove content within <think> and </think> from the answer
cleaned_answer = re.sub(r"<think>(.*?)</think>", "", response['answer'], flags=re.DOTALL).strip()
if cleaned_answer:
    print("Answer without <think> tags:")
    print(cleaned_answer)
else:
    print("No content found outside <think> tags.")

Content within <think> tags:
Okay, let me see. The user is asking for the answer to the question "What is your product?" based on the provided context. 

Looking at the documents, Document 1 is the company overview. In that document, there's a mention: "TechTrend Innovations is a leading technology company specializing in artificial intelligence and machine learning solutions." So the product here is AI and machine learning solutions.

Then there's Document 4, which is the product specifications for IntelliBot Chatbot. The product name is IntelliBot, version 2.1, released March 10, 2025, and the key features include AI-driven chatbots for customer service. The system requirements and pricing are listed there too. 

So, the product mentioned in the FAQs is the IntelliBot chatbot. The answer should be that the product is IntelliBot, which is an AI-driven chatbot. 

I need to make sure there's no conflicting information. The FAQs section only includes the question and the answer from Docu

In [None]:
print("Sources:")
for i, source in enumerate(response.get("sources", [])):
    print(f"Source {i+1} (score: {source['score']:.4f}):")
    print(f"From: {source['source']}")
    print(f"Content preview: {source['content'][:150]}...\n")