In [1]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    BertTokenizer,
    BertModel,
    pipeline,
    T5ForConditionalGeneration,
    T5Tokenizer
)
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

# 1. Persiapan Dataset
dataset = load_dataset("gsm8k", "main")
train_data = dataset['train'].shuffle(seed=42)
test_data = dataset['test'].shuffle(seed=42).select(range(100))  # Contoh 100 data uji

# 2. Inisialisasi Model
class MathSolver:
    def __init__(self):
        # MathBERT untuk retrieval
        self.mathbert = BertModel.from_pretrained('tbs17/MathBERT')
        self.bert_tokenizer = BertTokenizer.from_pretrained('tbs17/MathBERT')
        
        # T5 untuk CoT generation
        self.cot_model = T5ForConditionalGeneration.from_pretrained('t5-base')
        self.cot_tokenizer = T5Tokenizer.from_pretrained('t5-base')
        
        # Knowledge base untuk multi-hop
        self.knowledge_base = self._init_knowledge_base()
        
    def _init_knowledge_base(self):
        """Inisialisasi contoh knowledge base matematika"""
        return {
            'aljabar': [
                "Rumus kuadrat: ax² + bx + c = 0 → x = [-b ± √(b²-4ac)]/(2a)",
                "Sistem persamaan: Untuk menyelesaikan sistem persamaan, gunakan substitusi atau eliminasi"
            ],
            'geometri': [
                "Luas lingkaran = πr²",
                "Teorema Pythagoras: a² + b² = c²"
            ]
        }
    
    # 3. Multi-hop Retrieval System
    def retrieve_information(self, question, hops=2):
        """Sistem retrieval multi-hop sederhana"""
        embeddings = self._get_bert_embeddings(question)
        
        # Cari topik relevan
        topic_similarities = {}
        for topic in self.knowledge_base:
            topic_emb = self._get_bert_embeddings(topic)
            topic_similarities[topic] = cosine_similarity(embeddings, topic_emb)
            
        selected_topic = max(topic_similarities, key=topic_similarities.get)
        
        # Lakukan hop kedua
        context = []
        for _ in range(hops):
            best_match = max(
                self.knowledge_base[selected_topic],
                key=lambda x: cosine_similarity(
                    self._get_bert_embeddings(x),
                    embeddings
                )
            )
            context.append(best_match)
            embeddings = self._get_bert_embeddings(best_match)
            
        return "\n".join(context)
    
    def _get_bert_embeddings(self, text):
        """Ekstrak embeddings dari MathBERT"""
        inputs = self.bert_tokenizer(
            text, 
            return_tensors='pt',
            max_length=512,
            truncation=True
        )
        with torch.no_grad():
            outputs = self.mathbert(**inputs)
        return outputs.last_hidden_state.mean(dim=1).numpy()
    
    # 4. Chain-of-Thought Generator
    def generate_cot_solution(self, question):
        """Generate solusi dengan CoT"""
        prompt = f"""
        Selesaikan masalah matematika berikut dengan langkah-langkah:
        {question}
        Langkah 1: Pertama, 
        """
        inputs = self.cot_tokenizer(
            prompt,
            return_tensors='pt',
            max_length=512,
            truncation=True
        )
        
        outputs = self.cot_model.generate(
            inputs.input_ids,
            max_length=512,
            num_beams=5,
            early_stopping=True
        )
        
        return self.cot_tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # 5. Evaluasi
    def evaluate(self, method='cot', test_data=test_data):
        """Evaluasi performa model"""
        correct = 0
        for example in tqdm(test_data):
            question = example['question']
            true_answer = example['answer']

            if method == 'cot':
                solution = self.generate_cot_solution(question)
            else:
                context = self.retrieve_information(question)
                solution = self.generate_cot_solution(context + "\n" + question)

            if self._compare_answers(solution, true_answer):
                correct += 1

        return correct / len(test_data)
    
    def _compare_answers(self, pred, true):
        """Perbandingan jawaban sederhana"""
        pred = pred.strip().lower()
        true = true.strip().lower()
        return true in pred

# 6. Eksekusi Utama
if __name__ == "__main__":
    solver = MathSolver()
    
    # Contoh penggunaan
    problem = "Jika x² + 5x + 6 = 0, berapa nilai x?"
    
    # Multi-hop
    print("=== Multi-hop Retrieval ===")
    context = solver.retrieve_information(problem)
    print("Context:", context)
    print("Solution:", solver.generate_cot_solution(context + "\n" + problem))
    
    # CoT
    print("\n=== Chain-of-Thought ===")
    print("Solution:", solver.generate_cot_solution(problem))
    
    # Evaluasi
    print("\n=== Evaluasi ===")
    cot_acc = solver.evaluate('cot')
    retrieval_acc = solver.evaluate('retrieval')
    print(f"Akurasi CoT: {cot_acc:.2f}")
    print(f"Akurasi Multi-hop: {retrieval_acc:.2f}")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


=== Multi-hop Retrieval ===
Context: Teorema Pythagoras: a² + b² = c²
Teorema Pythagoras: a² + b² = c²
Solution: Teorema Pythagoras: a2 + b2 = c2 Teorema Pythagoras: a2 + b2 = c2 Jika x2 + 5x + 6 = 0, berapa nilai x?

=== Chain-of-Thought ===
Solution: x2 + 5x + 6 = 0. Selesaikan masalah matematika berikut dengan langkah-langkah: Jika x2 + 5x + 6 = 0, berapa nilai x? Langkah 1: Pertama, pertama, pertama, pertama, pertama, pertama, pertama, pertama, pertama, pertama, pertama, pertama, pertama, pertama, pertama, pertama,

=== Evaluasi ===


100%|██████████| 100/100 [09:05<00:00,  5.46s/it]
100%|██████████| 100/100 [10:08<00:00,  6.09s/it]

Akurasi CoT: 0.00
Akurasi Multi-hop: 0.00



