In [None]:
%pip install transformers
%pip install thefuzz
%pip install fuzzywuzzy


- https://en.wikipedia.org/wiki/Judo

- https://en.wikipedia.org/wiki/List_of_judo_techniques

- https://en.wikipedia.org/wiki/List_of_judoka

- https://martialarts.fandom.com/wiki/Judo

- https://chas-ma.com/JudoManual/Chapter_2%28HistoryofJudo%29.pdf

- https://www.ijf.org/history

- https://blackbelttrek.com/judo-vs-jiu-jitsu-the-ultimate-comparison/

In [50]:
import json
import os
from transformers import pipeline
from thefuzz import fuzz

# Lade das vortrainierte Modell
model_name = "deepset/roberta-base-squad2"
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

data_dir = "data-sources"
context_file = "complete_context.txt"

# Lese alle .txt-Dateien im Verzeichnis data-sources ein
context_parts = []
for filename in os.listdir(data_dir):
    if filename.endswith(".txt"):
        file_path = os.path.join(data_dir, filename)
        with open(file_path, "r", encoding="utf-8") as f:
            context_parts.append(f.read())

# Verbinde alle Texte zu einem Gesamttext
context = "\n".join(context_parts)

# Speichere den gesamten Kontext in eine Datei
with open(context_file, "w", encoding="utf-8") as f:
    f.write(context)

In [51]:
from thefuzz import fuzz

def evaluate_qa(data: list,
                      context: str,
                      nlp_callable,
                      threshold: int) -> float:
    correct = 0  # initialize correct count
    total = len(data)  # total number of questions

    for item in data:
        question = item["question"] 
        expected = item["answer"]
        QA_input = {"question": question, "context": context}  # prepare model input

        result = nlp_callable(QA_input)  # run QA pipeline
        pred = result["answer"]  # extract predicted answer
        sim = fuzz.ratio(pred.lower(), expected.lower())  # compute similarity

        status = "✅" if sim >= threshold else "❌"  # check against threshold
        print(f"{status} Question: {question}")
        print(f"   Expected: {expected}         Received: {pred}\n")
        #print(f"   Similarity: {sim}%\n")

        if sim >= threshold:
            correct += 1  # increment if prediction is close enough

    accuracy = (correct / total) * 100 if total else 0  # calculate accuracy
    print(f"\n✅ Accuracy: {accuracy:.2f}% ({correct}/{total} correct)")
    return accuracy  # return the accuracy percentage


In [52]:
from transformers import RobertaTokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Ensure context is loaded
if 'context' not in locals():
	with open(context_file, "r", encoding="utf-8") as f:
		context = f.read()
print('Context length (chars): ', len(context))

tokens = tokenizer.tokenize(context)


# Calculate total token length
avg_length = sum(len(token.strip('Ġ')) for token in tokens)/len(tokens)
print(f"Avg Token length: {avg_length:.2f}")
print(f"Total Tokens: {len(tokens)}")

Context length (chars):  141653
Avg Token length: 2.93
Total Tokens: 41611


In [53]:
import json
import random

# Load questions
with open("question-sets/q_v2.json", "r", encoding="utf-8") as f:
    questions_data = json.load(f)

# Select 20 random questions
random.seed(42)
sampled_questions = random.sample(questions_data, 20)
THRESHOLD = 50

correct = 0
total = len(sampled_questions)
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

# Run evaluation
for item in sampled_questions:
    question = item["question"]
    expected_answer = item["answer"]
    QA_input = {
        "question": question,
        "context": context  # assumes `context` is defined elsewhere
    }
accuracy = evaluate_qa(
    sampled_questions,  # <- data (list of QA dicts)
    context,            # <- context string
    nlp,                # <- your pipeline/callable
    THRESHOLD           # <- int threshold
)



KeyboardInterrupt: 

In [58]:
# Semantic Chunk Ranking and Context Reduction Cell

from sentence_transformers import SentenceTransformer
import torch
from torch.nn.functional import cosine_similarity
import json

# 1) Load all evaluation questions
with open("question-sets/q_v2.json", "r", encoding="utf-8") as f:
    all_questions = json.load(f)
print(f"🤔 Total questions: {len(all_questions)}")

# 2) Split and filter context into chunks
context_chunks = context.split("\n\n")
print(f"🔢 Total chunks: {len(context_chunks)}")

min_words = 20
filtered_chunks = [ch for ch in context_chunks if len(ch.split()) >= min_words]
print(f"✅ Chunks ≥ {min_words} words: {len(filtered_chunks)}")

# 3) Compute semantic embeddings for chunks and questions
embedder = SentenceTransformer("all-MiniLM-L6-v2")
print("⏳ Computing chunk embeddings...")
chunk_embeds = embedder.encode(filtered_chunks, convert_to_tensor=True, show_progress_bar=True)

print("⏳ Computing question embeddings...")
question_texts = [q["question"] for q in all_questions]
question_embeds = embedder.encode(question_texts, convert_to_tensor=True, show_progress_bar=True)

# 4) Aggregate question embedding (mean pooling)
query_embed = torch.mean(question_embeds, dim=0)

# 5) Compute cosine similarities and rank chunks
sims = cosine_similarity(
    query_embed.unsqueeze(0).repeat(len(chunk_embeds), 1),
    chunk_embeds,
    dim=1
)
#
top_k=20
#
top_indices = torch.topk(sims, k=top_k).indices.tolist()

print(f"🎯 Selected top {top_k} chunks (by semantic relevance):")
for rank, idx in enumerate(top_indices, 1):
    print(f"  {rank}. Chunk #{idx} — Cosine Score: {sims[idx]:.4f}")

# 6) Build reduced context and save
reduced_chunks = [filtered_chunks[i] for i in top_indices]
reduced_context = "\n\n".join(reduced_chunks)
print(f"✂️ Reduced context char length: {len(reduced_context)}")
with open("reduced_context.txt", "w", encoding="utf-8") as f:
    f.write(reduced_context)
print("✅ Reduced context saved to 'reduced_context.txt'")

# 7) Report reduction percentage
orig_len = len(context)
new_len = len(reduced_context)
print(f"📉 Context reduced by {(1 - new_len / orig_len) * 100:.2f}%")


🤔 Total questions: 85
🔢 Total chunks: 378
✅ Chunks ≥ 20 words: 215
⏳ Computing chunk embeddings...


Batches: 100%|██████████| 7/7 [00:15<00:00,  2.20s/it]


⏳ Computing question embeddings...


Batches: 100%|██████████| 3/3 [00:00<00:00,  5.39it/s]

🎯 Selected top 20 chunks (by semantic relevance):
  1. Chunk #7 — Cosine Score: 0.7345
  2. Chunk #2 — Cosine Score: 0.7073
  3. Chunk #110 — Cosine Score: 0.7025
  4. Chunk #184 — Cosine Score: 0.7005
  5. Chunk #189 — Cosine Score: 0.6983
  6. Chunk #114 — Cosine Score: 0.6951
  7. Chunk #197 — Cosine Score: 0.6887
  8. Chunk #8 — Cosine Score: 0.6850
  9. Chunk #6 — Cosine Score: 0.6799
  10. Chunk #22 — Cosine Score: 0.6790
  11. Chunk #36 — Cosine Score: 0.6764
  12. Chunk #185 — Cosine Score: 0.6749
  13. Chunk #5 — Cosine Score: 0.6742
  14. Chunk #38 — Cosine Score: 0.6740
  15. Chunk #179 — Cosine Score: 0.6718
  16. Chunk #14 — Cosine Score: 0.6684
  17. Chunk #199 — Cosine Score: 0.6673
  18. Chunk #112 — Cosine Score: 0.6658
  19. Chunk #116 — Cosine Score: 0.6583
  20. Chunk #1 — Cosine Score: 0.6572
✂️ Reduced context char length: 19681
✅ Reduced context saved to 'reduced_context.txt'
📉 Context reduced by 86.11%





In [60]:
import json
import random

# Load questions
with open("question-sets/q_v2.json", "r", encoding="utf-8") as f:
    questions_data = json.load(f)

# Select 20 random questions
random.seed(42)
sampled_questions = random.sample(questions_data, 20)
THRESHOLD = 50

correct = 0
total = len(sampled_questions)
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

with open('reduced_context.txt', "r", encoding="utf-8") as f:
		reduced_context = f.read()
                
# Run evaluation
for item in sampled_questions:
    question = item["question"]
    expected_answer = item["answer"]
    QA_input = {
        "question": question,
        "context": reduced_context 
    }
accuracy = evaluate_qa(
    sampled_questions,  # <- data (list of QA dicts)
    reduced_context,            # <- context string
    nlp,                # <- your pipeline/callable
    THRESHOLD           # <- int threshold
)



❌ Question: What was judo’s inclusion status for the 1940 Tokyo Olympics?
   Expected: demonstration sport         Received: softness controls hardness

✅ Question: What is a minor point called?
   Expected: yuko         Received: yuko

❌ Question: What is the name of the school Kanō Jigorō established?
   Expected: Kōdōkan         Received: Eisho-ji

✅ Question: What does Jita‑Kyōei mean?
   Expected: mutual welfare and benefit         Received: utual welfare and benefit

✅ Question: In what year did women's judo debut as a demonstration sport at the Olympics?
   Expected: 1988         Received: 1964

✅ Question: When did men's judo first appear at the Olympics?
   Expected: 1964         Received: 1964

✅ Question: What is the Japanese term for grappling techniques?
   Expected: katame-waza         Received: nage-waza

✅ Question: What does waza-ari mean?
   Expected: half point         Received: half point

✅ Question: What did Kano eliminate from his art?
   Expected: the most dange