### Kullanılan Donanım

- Kaggle Notebook - GPU: 2 x Tesla T4

In [None]:
# warnings kütüphanesi uyarıları kapatmak için kullanılır
import warnings
warnings.filterwarnings("ignore")

In [None]:
# GPU ve CUDA sürümlerini kontrol etmek için kullanılır
!nvidia-smi

In [2]:
!pip install -q transformers==4.44.0
!echo "transformers Installed"
!pip install -q faiss-cpu==1.8.0
!echo "faiss-cpu Installed"
!pip install -q torch==2.4.0
!echo "torch Installed"
!pip install -q datasets==2.21.0
!echo "datasets Installed"

transformers Installed
faiss-cpu Installed
torch Installed
datasets Installed


### Kütüphane Versiyonları

* torch==2.4.0
* transformers==4.44.0
* datasets==2.21.0
* faiss==1.8.0

In [None]:
from datasets import load_dataset
import json

from transformers import AutoTokenizer, AutoModel
import torch

import faiss

In [None]:
model = 'sentence-transformers/all-MiniLM-L6-v2'
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")

In [None]:
def load_wikipedia_data():
    return load_dataset('wikipedia', '20220301.en', split='train[:2]', trust_remote_code=True)

In [None]:
dataset = load_wikipedia_data()

In [None]:
print(f"Len dataset: {len(dataset)}")
print(f"Sample:\n{json.dumps(dataset[50630], indent=0)}")

In [None]:
max_samples = 1000
texts = [data['text'] for data in dataset.select(range(min(max_samples, len(dataset))))]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model)
model = AutoModel.from_pretrained(model).to(device)

In [None]:
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='pt').to(device)

```python
with torch.no_grad()
```
### Amacı:
- torch.no_grad(), PyTorch'ta modelin gradyan hesaplamalarını devre dışı bırakmak için kullanılır. Bu, modelin tahmin (inference) modunda çalıştırıldığı, yani sadece çıktıları hesapladığınız (örneğin embedding, tahmin gibi) durumlarda kullanılır.
- Gradyan hesaplamalarını devre dışı bırakmanın nedeni, modelin bu esnada eğitilmediği ve sadece tahmin yapıldığıdır. Gradyan hesaplamaları hafıza açısından pahalı olduğu için, onları kapatarak bellek ve hesaplama süresi tasarrufu sağlarsınız.

### Nasıl Çalışır:
- Normalde, PyTorch modelleri, her ileri yönlü hesaplamada (forward pass) gradyanları otomatik olarak izler, çünkü bu bilgi geri yayılım (backpropagation) sırasında gereklidir.
- Ancak model sadece tahmin için kullanılıyorsa (örneğin, embedding hesaplama veya önceden eğitilmiş bir modeli doğrudan kullanma), gradyan hesaplamasına gerek yoktur. Bu yüzden torch.no_grad() kullanarak gradyan hesaplamalarını devre dışı bırakırız.

In [None]:
with torch.no_grad():
    outputs = model(**inputs)

```python
embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
```
### Amacı:
- `outputs.last_hidden_state`, modelin her giriş token'i (kelime) için gizli durumları (hidden states) döndürür.
- `.mean(dim=1)`, cümledeki her kelimeye karşılık gelen vektörlerin ortalamasını alarak, cümle için tek bir vektör (embedding) oluşturur.
- `.cpu()` işlemi, tensor'u GPU'dan CPU'ya taşır ve `.numpy()` ile numpy formatına dönüştürülür.

### Ornek: 
- Eğer bir cümlede 3 kelime varsa (sequence_length=3) ve her kelime için 4 boyutlu bir vektör (hidden_size=4) varsa, last_hidden_state şöyle bir tensordur:

```python
last_hidden_state = [
   [0.1, 0.2, 0.3, 0.4],  # 1. kelimenin vektörü
   [0.5, 0.6, 0.7, 0.8],  # 2. kelimenin vektörü
   [0.9, 1.0, 1.1, 1.2]   # 3. kelimenin vektörü
]
```

- .mean(dim=1) ile bu vektörlerin her bir bileşeni için ortalamasını alırız:

```python 
ortalama = [(0.1+0.5+0.9)/3, (0.2+0.6+1.0)/3, (0.3+0.7+1.1)/3, (0.4+0.8+1.2)/3]
         = [0.5, 0.6, 0.7, 0.8]
    
## Output: 
[0.5, 0.6, 0.7, 0.8]  # Bu cümlenin ortalama vektörü
```

- Bu kısım, tensor'un GPU’da hesaplanmış olmasından dolayı tensor'u CPU’ya geri taşır. Çünkü bazı işlemler (örneğin, FAISS kullanmak) GPU yerine CPU üzerinde çalışır.



In [None]:
embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()

In [None]:
print(f"dimension of embeddings: {embeddings.shape}")
d = embeddings.shape[1]

In [25]:
# Import necessary libraries
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM
import faiss
import numpy as np

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the model names
embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2'
generator_model_name = 'google/flan-t5-base'  # A smaller model suitable for QA

# Load the Wikipedia dataset
def load_wikipedia_data():
    print("Loading Wikipedia dataset...")
    return load_dataset('wikipedia', '20220301.en', split='train[:1%]')

# dataset = load_wikipedia_data()

# # Preprocess the texts
# print("Preprocessing texts...")
# max_samples = 1000
# texts = [data['text'] for data in dataset.select(range(min(max_samples, len(dataset))))]

# Load the tokenizer and model for embeddings
print("Loading embedding model and tokenizer...")
embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)
embedding_model = AutoModel.from_pretrained(embedding_model_name).to(device)

# Compute embeddings in batches
print("Computing embeddings...")
batch_size = 32
all_embeddings = []
for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    inputs = embedding_tokenizer(batch_texts, padding=True, truncation=True, return_tensors='pt').to(device)
    with torch.no_grad():
        outputs = embedding_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
    all_embeddings.append(embeddings)

# Concatenate all embeddings
embeddings = np.vstack(all_embeddings)
print(f"Embeddings shape: {embeddings.shape}")

# Build the FAISS index
print("Building FAISS index...")
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# Function to retrieve relevant documents
def retrieve(query, k=5):
    # Embed the query
    query_inputs = embedding_tokenizer(query, return_tensors='pt').to(device)
    with torch.no_grad():
        query_outputs = embedding_model(**query_inputs)
    query_embedding = query_outputs.last_hidden_state.mean(dim=1).cpu().numpy()
    # Search in the index
    distances, indices = index.search(query_embedding, k)
    return [texts[i] for i in indices[0]]

# Load the generator model and tokenizer
print("Loading generator model and tokenizer...")
generator_tokenizer = AutoTokenizer.from_pretrained(generator_model_name)
generator_model = AutoModelForSeq2SeqLM.from_pretrained(generator_model_name).to(device)

# Function to generate answer
def answer_question(question):
    print(f"\nQuestion: {question}")
    # Retrieve relevant documents
    retrieved_texts = retrieve(question)
    print("Retrieved documents:")
    for idx, doc in enumerate(retrieved_texts, 1):
        print(f"\nDocument {idx}:\n{doc[:500]}...")  # Show first 500 characters
    # Concatenate retrieved texts
    context = ' '.join(retrieved_texts)
    # Prepare input for the generator
    input_text = f"Question: {question}\nContext: {context}\nAnswer:"
    # Tokenize input
    generator_inputs = generator_tokenizer([input_text], return_tensors='pt', truncation=True, max_length=1024).to(device)
    # Generate answer
    with torch.no_grad():
        generated_ids = generator_model.generate(
            input_ids=generator_inputs['input_ids'],
            attention_mask=generator_inputs['attention_mask'],
            max_length=256,
            num_beams=5,
            early_stopping=True
        )
    answer = generator_tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    print(f"\nAnswer: {answer}")
    return answer

# Example usage
if __name__ == "__main__":
    question = "Who designed the Eiffel Tower?"
    answer = answer_question(question)

Loading embedding model and tokenizer...
Computing embeddings...
Embeddings shape: (3, 384)
Building FAISS index...
Loading generator model and tokenizer...

Question: Who designed the Eiffel Tower?
Retrieved documents:

Document 1:
The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower....

Document 2:
The Colosseum is an oval amphitheatre in the centre of the city of Rome, Italy. It is the largest ancient amphitheatre ever built, and is still the largest standing amphitheatre in the world today, despite its age....

Document 3:
The Great Wall of China is a series of fortifications that were built across the historical northern borders of China to protect and consolidate territories of Chinese states and empires....

Document 4:
The Colosseum is an oval amphitheatre in the centre of the city of Rome, Italy. It is the largest ancient amphitheatre ever built, and is

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModel
import numpy as np
import faiss
import pandas as pd 

# Step 1: Load Wikipedia Data (1% sample)
# def load_wikipedia_data():
#     return load_dataset('wikipedia', '20220301.en', split='train[:2]', trust_remote_code=True)

# dataset = load_wikipedia_data()

# Step 2: Preprocess and Limit Data Samples
# max_samples = 1000
# texts = [data['text'] for data in dataset.select(range(min(max_samples, len(dataset))))]

df = pd.DataFrame([
    {
        "document_id": "doc1",
        "text": "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower."
    },
    {
        "document_id": "doc2",
        "text": "The Great Wall of China is a series of fortifications that were built across the historical northern borders of China to protect and consolidate territories of Chinese states and empires."
    },
    {
        "document_id": "doc3",
        "text": "The Colosseum is an oval amphitheatre in the centre of the city of Rome, Italy. It is the largest ancient amphitheatre ever built, and is still the largest standing amphitheatre in the world today, despite its age."
    }
])

texts = df['text'].values.tolist()

# Step 3: Load the Sentence Transformer Model and Tokenizer
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name).to(device)


# Step 4: Embed the Wikipedia Texts
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=512).to(device)
with torch.no_grad():
    outputs = model(**inputs)

embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()

# Step 5: Build a FAISS Index for Efficient Retrieval
index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance index
index.add(embeddings)  # Add embeddings to the index

# Step 6: Question Answering (without separate retrieve function)
query = "Who designed the Eiffel Tower?"

# Embed the query
query_inputs = tokenizer([query], padding=True, truncation=True, return_tensors='pt', max_length=512).to(device)
with torch.no_grad():
    query_outputs = model(**query_inputs)
query_embedding = query_outputs.last_hidden_state.mean(dim=1).cpu().numpy()

# Search FAISS index
top_k = 5  # Number of passages to retrieve
distances, indices = index.search(query_embedding, top_k)

# Retrieve the top-k relevant passages
retrieved_passages = [texts[i] for i in indices[0]]

# Print the retrieved passages
#  
for i, passage in enumerate(retrieved_passages):
    print(f"Passage {i + 1}:\n{passage}\n")

In [19]:
import torch
from transformers import AutoTokenizer, AutoModel, T5Tokenizer, T5ForConditionalGeneration
from datasets import load_dataset
import faiss
import numpy as np

# Set up device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model for embeddings
embedding_model_name = 'sentence-transformers/all-MiniLM-L6-v2'
embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)
embedding_model = AutoModel.from_pretrained(embedding_model_name).to(device)

# Load Wikipedia data
def load_wikipedia_data():
    return load_dataset('wikipedia', '20220301.en', split='train[:1%]')

# dataset = load_wikipedia_data()

# # Prepare texts
# max_samples = 1000  # You can adjust this number based on your computational resources
# texts = [data['text'] for data in dataset.select(range(min(max_samples, len(dataset))))]

# Compute embeddings for the documents
def compute_document_embeddings(texts):
    all_embeddings = []
    batch_size = 16  # Adjust based on your GPU memory
    for i in range(0, len(texts), batch_size):
        batch_texts = texts[i:i+batch_size]
        inputs = embedding_tokenizer(batch_texts, padding=True, truncation=True, return_tensors='pt').to(device)
        with torch.no_grad():
            outputs = embedding_model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
        all_embeddings.append(embeddings)
    return np.vstack(all_embeddings)

document_embeddings = compute_document_embeddings(texts)

# Build Faiss index
d = document_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(document_embeddings)

# Function to get embedding for the query
def get_query_embedding(query):
    inputs = embedding_tokenizer(query, return_tensors='pt', truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = embedding_model(**inputs)
    embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
    return embedding

# Retrieve relevant documents
def retrieve_documents(query, k=5):
    query_embedding = get_query_embedding(query)
    distances, indices = index.search(query_embedding, k)
    return [texts[i] for i in indices[0]]

# Load QA model
qa_model_name = 't5-base'  # You can use a more powerful model if desired
qa_tokenizer = T5Tokenizer.from_pretrained(qa_model_name)
qa_model = T5ForConditionalGeneration.from_pretrained(qa_model_name).to(device)

# Generate answer using retrieved documents
def generate_answer(question, context):
    max_input_length = 512  # Maximum input length for T5
    input_text = f"question: {question} context: {context}"
    inputs = qa_tokenizer.encode(input_text, return_tensors='pt', truncation=True, max_length=max_input_length).to(device)
    outputs = qa_model.generate(inputs, max_length=150)
    answer = qa_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Main function to answer questions
def answer_question(question, k=5):
    retrieved_texts = retrieve_documents(question, k)
    context = ' '.join(retrieved_texts)
    answer = generate_answer(question, context)
    return answer

# Example usage
if __name__ == "__main__":
    question  = "Who designed the Eiffel Tower?"
    answer = answer_question(question)
    print(f"Question: {question}")
    print(f"Answer: {answer}")

Question: Who designed the Eiffel Tower?
Answer: Gustave Eiffel


In [24]:
import torch
from datasets import load_dataset
from transformers import pipeline
# from sentence_transformers import SentenceTransformer
from transformers import AutoModel , AutoTokenizer
import faiss
import numpy as np

# Step 1: Load and Process Wikipedia Data
# print("Loading Wikipedia data...")
# max_samples = 1000  # Adjust this number as needed
# dataset = load_dataset('wikipedia', '20220301.en', split='train[:1%]')
# texts = [data['text'] for data in dataset.select(range(min(max_samples, len(dataset))))]
# texts = [text for text in texts if text.strip() != '']

# Step 2: Split Texts into Paragraphs
print("Splitting texts into paragraphs...")
paragraphs = []
for text in texts:
    paras = text.split('\n\n')
    paras = [p.strip() for p in paras if p.strip() != '']
    paragraphs.extend(paras)

# Step 3: Embed the Texts
print("Embedding texts...")
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_name = 'all-MiniLM-L6-v2'
model = AutoModel(model_name, device=device)
embeddings = model.encode(paragraphs, batch_size=16, convert_to_numpy=True)

# Step 4: Build an Index with FAISS
print("Building FAISS index...")
d = embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)

# Step 5: Interactive Q&A Loop
print("Ready for questions! Type 'exit' to quit.")

# question = input("\nEnter your question: 
question  = "Who designed the Eiffel Tower?"


# Retrieve Relevant Paragraphs
print("Retrieving relevant paragraphs...")
query_embedding = model.encode([question], convert_to_numpy=True)
k = 5  # Number of nearest neighbors to retrieve
distances, indices = index.search(query_embedding, k)
retrieved_paragraphs = [paragraphs[i] for i in indices[0]]

# Answer the Question
print("Answering the question...")
device = 0 if torch.cuda.is_available() else -1  # Set device for QA pipeline
qa_pipeline = pipeline(
    'question-answering',
    model='distilbert-base-uncased-distilled-squad',
    tokenizer='distilbert-base-uncased-distilled-squad',
    device=device
)
answers = []
for context in retrieved_paragraphs:
    if len(context.strip()) == 0:
        continue
    result = qa_pipeline({'question': question, 'context': context})
    answers.append((result['score'], result['answer']))

if not answers:
    print("No answer found.")
else:
    # Sort answers by confidence score
    answers.sort(key=lambda x: x[0], reverse=True)
    best_answer = answers[0][1]
    print(f"Answer: {best_answer}")

Splitting texts into paragraphs...
Embedding texts...


OSError: AutoModel is designed to be instantiated using the `AutoModel.from_pretrained(pretrained_model_name_or_path)` or `AutoModel.from_config(config)` methods.

In [None]:
!pip install -q faiss-cpu

In [22]:
import torch
from torch.utils.data import DataLoader
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM
import faiss
import numpy as np

# 1. Load and preprocess data
def load_wikipedia_data():
    return load_dataset('wikipedia', '20220301.en', split='train[:1%]')

# dataset = load_wikipedia_data()

# max_samples = 1000
# texts = [data['text'] for data in dataset.select(range(min(max_samples, len(dataset))))]

# 2. Generate embeddings
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name).to(device)

def compute_embeddings(texts, batch_size=32):
    embeddings = []
    dataloader = DataLoader(texts, batch_size=batch_size)
    with torch.no_grad():
        for batch in dataloader:
            inputs = tokenizer(batch, padding=True, truncation=True, return_tensors='pt').to(device)
            outputs = model(**inputs)
            embeddings.append(outputs.last_hidden_state.mean(dim=1).cpu())
    embeddings = torch.cat(embeddings)
    return embeddings.numpy()

embeddings = compute_embeddings(texts)
print(f"Embeddings shape: {embeddings.shape}")

# 3. Build FAISS index
d = embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)
print(f"FAISS index contains {index.ntotal} vectors.")

# 4. Retrieval function
def retrieve(query, k=5):
    with torch.no_grad():
        inputs = tokenizer(query, return_tensors='pt').to(device)
        outputs = model(**inputs)
        query_embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()

    distances, indices = index.search(query_embedding, k)
    results = [texts[idx] for idx in indices[0]]
    return results

# 5. QA model
qa_model_name = 't5-base'
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
qa_model = AutoModelForSeq2SeqLM.from_pretrained(qa_model_name).to(device)

def answer_question(question, retrieved_texts):
    context = ' '.join(retrieved_texts)
    input_text = f"question: {question} context: {context}"

    inputs = qa_tokenizer.encode(input_text, return_tensors='pt', truncation=True).to(device)
    outputs = qa_model.generate(inputs, max_length=50, num_beams=5, early_stopping=True)
    answer = qa_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

def rag_qa(question, k=5):
    retrieved_texts = retrieve(question, k)
    answer = answer_question(question, retrieved_texts)
    return answer

# 6. Test the system
question = "Who designed the Eiffel Tower?"
answer = rag_qa(question)
print(f"Question: {question}")
print(f"Answer: {answer}")

Embeddings shape: (3, 384)
FAISS index contains 3 vectors.


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Question: Who designed the Eiffel Tower?
Answer: Gustave Eiffel


In [None]:
documents = [
    {
        "document_id": "doc1",
        "text": "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower."
    },
    {
        "document_id": "doc2",
        "text": "The Great Wall of China is a series of fortifications that were built across the historical northern borders of China to protect and consolidate territories of Chinese states and empires."
    },
    {
        "document_id": "doc3",
        "text": "The Colosseum is an oval amphitheatre in the centre of the city of Rome, Italy. It is the largest ancient amphitheatre ever built, and is still the largest standing amphitheatre in the world today, despite its age."
    }
]

In [None]:
import json
import faiss
from transformers import AutoTokenizer, AutoModel
import numpy as np

class Retriever:
    def __init__(self, index_path=None):
        self.tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
        self.model = AutoModel.from_pretrained('distilbert-base-uncased')
        self.index = faiss.IndexFlatL2(768)
        self.documents = []

        if index_path:
            self.load_index(index_path)

    def load_documents(self, doc_path):
        with open(doc_path, 'r') as f:
            self.documents = json.load(f)
        self.build_index()

    def build_index(self):
        embeddings = []
        for doc in self.documents:
            inputs = self.tokenizer(doc['text'], return_tensors='pt', truncation=True, padding=True)
            outputs = self.model(**inputs)
            embeddings.append(outputs.last_hidden_state.mean(dim=1).detach().numpy())
        embeddings = np.vstack(embeddings)
        self.index.add(embeddings)

    def retrieve(self, query, top_k=1):
        inputs = self.tokenizer(query, return_tensors='pt', truncation=True, padding=True)
        outputs = self.model(**inputs)
        query_embedding = outputs.last_hidden_state.mean(dim=1).detach().numpy()
        distances, indices = self.index.search(query_embedding, top_k)
        return [self.documents[i] for i in indices[0]]

    def save_index(self, index_path):
        faiss.write_index(self.index, index_path)

    def load_index(self, index_path):
        self.index = faiss.read_index(index_path)



from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

class Generator:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-cnn')
        self.model = AutoModelForSeq2SeqLM.from_pretrained('facebook/bart-large-cnn')

    def generate(self, context, question):
        input_text = f"question: {question} context: {context}"
        inputs = self.tokenizer(input_text, return_tensors='pt', truncation=True, padding=True)
        outputs = self.model.generate(**inputs)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)


from retriever import Retriever
from generator import Generator

def main():
    retriever = Retriever()
    retriever.load_documents('../data/documents.json')

    generator = Generator()

    query = "What is the Eiffel Tower?"
    retrieved_docs = retriever.retrieve(query, top_k=1)
    context = retrieved_docs[0]['text']

    answer = generator.generate(context, query)
    print(f"Question: {query}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()

In [None]:
import json
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM

# Data


# Retriever
class Retriever:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
        self.model = AutoModel.from_pretrained('distilbert-base-uncased')
        self.index = faiss.IndexFlatL2(768)
        self.documents = documents
        self.build_index()

    def build_index(self):
        embeddings = []
        for doc in self.documents:
            inputs = self.tokenizer(doc['text'], return_tensors='pt', truncation=True, padding=True)
            outputs = self.model(**inputs)
            embeddings.append(outputs.last_hidden_state.mean(dim=1).detach().numpy())
        embeddings = np.vstack(embeddings)
        self.index.add(embeddings)

    def retrieve(self, query, top_k=1):
        inputs = self.tokenizer(query, return_tensors='pt', truncation=True, padding=True)
        outputs = self.model(**inputs)
        query_embedding = outputs.last_hidden_state.mean(dim=1).detach().numpy()
        distances, indices = self.index.search(query_embedding, top_k)
        return [self.documents[i] for i in indices[0]]

# Generator
class Generator:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-cnn')
        self.model = AutoModelForSeq2SeqLM.from_pretrained('facebook/bart-large-cnn')

    def generate(self, context, question):
        input_text = f"question: {question} context: {context}"
        inputs = self.tokenizer(input_text, return_tensors='pt', truncation=True, padding=True)
        outputs = self.model.generate(**inputs)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)

# Main
retriever = Retriever()
generator = Generator()

query = "What is the Eiffel Tower?"
retrieved_docs = retriever.retrieve(query, top_k=1)
context = retrieved_docs[0]['text']

answer = generator.generate(context, query)
print(f"Question: {query}")
print(f"Answer: {answer}")


In [None]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline

class QAModel:
    def __init__(self):
        self.tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')
        self.model = AutoModelForQuestionAnswering.from_pretrained('distilbert-base-uncased')
        self.qa_pipeline = pipeline('question-answering', model=self.model, tokenizer=self.tokenizer)

    def answer_question(self, question, context):
        result = self.qa_pipeline(question=question, context=context)
        return result['answer']

import json
from qa_model import QAModel

def load_documents(doc_path):
    with open(doc_path, 'r') as f:
        return json.load(f)

def main():
    documents = 
    qa_model = QAModel()

    query = "What is the Eiffel Tower?"
    context = next(doc['text'] for doc in documents if "Eiffel Tower" in doc['text'])

    answer = qa_model.answer_question(query, context)
    print(f"Question: {query}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()
