In [5]:
!pip install transformers torch




In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load the model and tokenizer
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Move model to GPU if available
if torch.cuda.is_available():
    model = model.to("cuda")

def generate_answer(question, max_length=50, temperature=0.7, top_k=50):
    # Create a prompt structure to guide the model
    prompt = f"Q: {question}\nA:"

    # Tokenize input
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    if torch.cuda.is_available():
        input_ids = input_ids.to("cuda")

    # Generate response with adjusted parameters
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            temperature=temperature,
            top_k=top_k,
            num_return_sequences=1,
            no_repeat_ngram_size=2
        )

    # Decode and return answer
    answer = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract answer text after "A:"
    answer = answer.split("A:")[-1].strip()
    return answer

# Test with a question
question = "What is the capital of France?"
answer = generate_answer(question)
print("Q:", question)
print("A:", answer)




Q: What is the capital of France?
A: The capital city of the country is Paris.


In [11]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load model and tokenizer
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Move model to GPU if available
if torch.cuda.is_available():
    model = model.to("cuda")

# Initialize a conversation history
conversation_history = []

def generate_answer(question, max_length=100, temperature=0.7, top_k=50):
    # Add the new question to the conversation history
    conversation_history.append(f"Q: {question}")

    # Create the input by joining all history entries with newlines
    prompt = "\n".join(conversation_history) + "\nA:"

    # Tokenize the prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    if torch.cuda.is_available():
        input_ids = input_ids.to("cuda")

    # Generate the response
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_length=max_length,
            temperature=temperature,
            top_k=top_k,
            num_return_sequences=1,
            no_repeat_ngram_size=2
        )

    # Decode and clean up the answer
    answer = tokenizer.decode(output[0], skip_special_tokens=True)
    answer = answer.split("A:")[-1].strip()

    # Add the answer to the conversation history
    conversation_history.append(f"A: {answer}")

    return answer

# Example: Asking multiple questions in a row
questions = [
    "What is the capital of France?",
    "Who is the president of the United States?",
    "What is the largest planet in our solar system?"
]

for question in questions:
    answer = generate_answer(question)
    print("Q:", question)
    print("A:", answer)
    print("-" * 30)  # Separator for readability


Q: What is the capital of France?
A: The capital city of the country is Paris.
------------------------------
Q: Who is the president of the United States?
A: Donald Trump is currently the President of USA.
------------------------------
Q: What is the largest planet in our solar system?
A: Jupiter is considered as the biggest planet.
------------------------------
