In [14]:
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import re

In [15]:
# Load Sentence Transformer Model
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(MODEL_NAME)

# Sample text (Embedded directly in the notebook)
sample_text = """
The Eiffel Tower is one of the most recognizable landmarks in the world. It is located in Paris, France, and was constructed between 1887 and 1889. Designed by Gustave Eiffel, the tower stands at 330 meters tall and was originally built as the entrance arch for the 1889 World's Fair.

Over the years, the Eiffel Tower has become a symbol of France and attracts millions of tourists annually. It was initially criticized by many artists and intellectuals, but today it is considered an architectural marvel.

The tower consists of three levels that visitors can access, with restaurants and observation decks offering panoramic views of Paris. The structure is repainted every seven years to protect it from rust, using about 60 tons of paint each time.

At night, the Eiffel Tower is illuminated with thousands of sparkling lights, making it one of the most breathtaking sights in the city. The lights were first installed in 1985 and have since become an integral part of the tower's charm.
"""


In [None]:
# Function to split text into meaningful chunks (paragraphs)
def split_text(text):
    return [para.strip() for para in re.split("\n+", text) if para.strip()]

# Store document embeddings
stored_texts = split_text(sample_text)  # Store each paragraph separately
stored_embeddings = model.encode(stored_texts, convert_to_numpy=True)  # Store embeddings for each chunk

In [18]:
#Alternative form of chunking

# Function to split text into meaningful chunks (sentences)
def split_text(text):
    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
    return [sentence.strip() for sentence in sentences if sentence.strip()]

# Store document embeddings
stored_texts = split_text(sample_text)  # Store each sentence separately
stored_embeddings = model.encode(stored_texts, convert_to_numpy=True)  # Store embeddings for each chunk


In [22]:
stored_texts

['The Eiffel Tower is one of the most recognizable landmarks in the world.',
 'It is located in Paris, France, and was constructed between 1887 and 1889.',
 "Designed by Gustave Eiffel, the tower stands at 330 meters tall and was originally built as the entrance arch for the 1889 World's Fair.",
 'Over the years, the Eiffel Tower has become a symbol of France and attracts millions of tourists annually.',
 'It was initially criticized by many artists and intellectuals, but today it is considered an architectural marvel.',
 'The tower consists of three levels that visitors can access, with restaurants and observation decks offering panoramic views of Paris.',
 'The structure is repainted every seven years to protect it from rust, using about 60 tons of paint each time.',
 'At night, the Eiffel Tower is illuminated with thousands of sparkling lights, making it one of the most breathtaking sights in the city.',
 "The lights were first installed in 1985 and have since become an integral par

In [19]:
# Function to generate embeddings
def get_transformer_embeddings(texts):
    return model.encode(texts, convert_to_numpy=True)

# Function to retrieve relevant passage
def retrieve_passage(query):
    query_embedding = get_transformer_embeddings([query])
    similarities = cosine_similarity(query_embedding, stored_embeddings)[0]  # Ensure correct shape
    best_match_idx = np.argmax(similarities)
    return stored_texts[best_match_idx]

# Function to answer questions based on stored content
def answer_question(query):
    relevant_passage = retrieve_passage(query)
    return relevant_passage

In [24]:
# Sample Questions and Answers
questions = [
    "In which city is it and which year was it constructed?",
    "Which architect designed the Eiffel Tower?",
    "Why was the Eiffel Tower initially built?",
    "How often is the Eiffel Tower repainted?",
    "What happens to the Eiffel Tower at night?"
]

print("\nSample Questions and Answers:\n")
for question in questions:
    response = answer_question(question)
    print(f"Q: {question}\nA: {response}\n")




Sample Questions and Answers:

Q: In which city is it and which year was it constructed?
A: It is located in Paris, France, and was constructed between 1887 and 1889.

Q: Which architect designed the Eiffel Tower?
A: The Eiffel Tower is one of the most recognizable landmarks in the world.

Q: Why was the Eiffel Tower initially built?
A: The Eiffel Tower is one of the most recognizable landmarks in the world.

Q: How often is the Eiffel Tower repainted?
A: The structure is repainted every seven years to protect it from rust, using about 60 tons of paint each time.

Q: What happens to the Eiffel Tower at night?
A: At night, the Eiffel Tower is illuminated with thousands of sparkling lights, making it one of the most breathtaking sights in the city.

