In [6]:
import sys
import os
import pandas as pd
import torch
import random
from tqdm import tqdm
import time
from sentence_transformers import CrossEncoder
import itertools
import ast
from typing import Any, Dict, List

# fixed seed
FIXED_SEED = 42
torch.manual_seed(FIXED_SEED)
torch.cuda.manual_seed(FIXED_SEED)
torch.cuda.manual_seed_all(FIXED_SEED)
random.seed(FIXED_SEED)

class EvidenceSelector:
    def __init__(self):
        self.PASSAGE_RANKER = CrossEncoder(
            "cross-encoder/ms-marco-MiniLM-L-6-v2",
            max_length=512,
            device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
        )
        
        print("[Evidence Selector] Initialized \n")
    

    def compute_score_matrix(self, 
        questions: List[str], evidences: List[str]
    ) -> List[List[float]]:
        
        score_matrix = []
        for q in questions:
            evidence_scores = self.PASSAGE_RANKER.predict([(q, e) for e in evidences]).tolist()
            score_matrix.append(evidence_scores)
            
        return score_matrix


    def question_coverage_objective_fn(self, 
        score_matrix: List[List[float]], evidence_indices: List[int]
    ) -> float:
        total = 0.0
        for scores_for_question in score_matrix:
            total += max(scores_for_question[j] for j in evidence_indices)
            
        return total        
    
    
    def select_evidence(self, data: pd.DataFrame):
        print("[Evidence Selector] Selecting evidence from list of retrieved documents...")
        
        selected_evidences = []
        evd_latency_list = []
        max_selected = 6
        prefer_fewer = False

        for query, list_evidences in tqdm(zip(data['query'], data['retrieved_evidence'])):
            start_time = time.time()
            list_evidences = ast.literal_eval(list_evidences)
            
            # 증거 리스트는 이미 리스트 형태로 제공됨
            # docs = [evid.replace("\n", "") for evid in list_evidences]
            
            print(f"list_evid ---> {list_evidences} ,,,,,,, type ---> {type(list_evidences)}\n\n\n")
            docs = [evid.replace("\n", "") for evid in list_evidences if isinstance(evid, str)]


            # 중복 제거 및 정렬
            questions = sorted(set([query]))  # query는 문자열이므로 리스트로 감쌈
            evidences = sorted(set(docs))

            num_evidences = len(evidences)

            # 점수 행렬 계산
            score_matrix = self.compute_score_matrix(questions, evidences)

            best_combo = tuple()
            best_objective_value = float("-inf")
            max_selected = min(max_selected, num_evidences)
            min_selected = 1 if prefer_fewer else max_selected

            # 최고 점수를 가진 조합을 찾음
            for num_selected in range(min_selected, max_selected + 1):
                for combo in itertools.combinations(range(num_evidences), num_selected):
                    objective_value = self.question_coverage_objective_fn(score_matrix, combo)
                    if objective_value > best_objective_value:
                        best_combo = combo
                        best_objective_value = objective_value

            # 최고 점수를 가진 증거를 저장
            selected_evidences.append([{"text": evidences[idx]} for idx in best_combo][0]['text'])
            
            end_time = time.time()
            latency = end_time - start_time
            
        
        print(evd_latency_list)
            
        data['selected_evidence'] = selected_evidences
        data['selected_evd_latency'] = evd_latency_list
        
            
            
dataset = pd.read_csv('/home/work/hyun/Hallucination/RARR_OURS/outputs/nq_retrieved2.csv')

evd_obj = EvidenceSelector()
evd_obj.select_evidence(dataset)

[Evidence Selector] Initialized 

[Evidence Selector] Selecting evidence from list of retrieved documents...


0it [00:00, ?it/s]

list_evid ---> ['"Moon landing" have landed on the Moon. This was accomplished with two US pilot-astronauts flying a Lunar Module on each of six NASA missions across a 41-month period starting on 20 July 1969 UTC, with Neil Armstrong and Buzz Aldrin on Apollo 11, and ending on 14 December 1972 UTC with Gene Cernan and Jack Schmitt on Apollo 17. Cernan was the last to step off the lunar surface. All Apollo lunar missions had a third crew member who remained on board the Command Module. The last three missions had a rover for increased mobility. In order to go to the Moon,', '"Apollo 17" investigate the possibility of relatively new volcanic activity in the same area. Cernan, Evans, and Schmitt returned to Earth on December 19 after a 12-day mission. Apollo 17 is the most recent manned Moon landing and the most recent time humans travelled beyond low Earth orbit. It was also the first mission to have no one on board who had been a test pilot; X-15 test pilot Joe Engle lost the lunar modu

7it [00:00,  7.92it/s]


list_evid ---> ['"Human Lunar Return study" Human Lunar Return study NASA began its ""Human Lunar Return study"" in September 1995 to identify ways it could conduct future human spaceflight missions to the Moon. The final Human Lunar Return (HLR) briefing took place on August 7, 1996. The study was seen as laying ""the foundation for human space activity over the next three decades."" The study called for a mission lasting 16 days, 10 of which would be spent on the lunar surface. The study baselined a lightweight architecture including an open-cockpit lunar lander weighing including fuel. The lunar habitat was designed to have an inflatable hull', '"Human Lunar Return study" The projected cost of the mission over the five year development timeline ranged between $2.5 and $4 billion. The mission required two shuttle and three Proton launches to land two astronauts and a small habitat structure at Aristarchus crater. Human Lunar Return study NASA began its ""Human Lunar Return study"" in

ValueError: Length of values (0) does not match length of index (7)