In [1]:
!pip install -q ragatouille

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.7/86.7 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m809.1/809.1 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m260.9/260.9 kB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m55.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m55.4 MB/s

In [2]:
!pip install -q torch transformers transformers accelerate bitsandbytes langchain sentence-transformers faiss-gpu openpyxl

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.1/290.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.2/102.2 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
!pip install -q datasets

In [4]:
# if you use colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# If you use colab, this is colab_path, please change it to your own path to the code folder of this project
# WARNING: IF YOU ARE USING COLAB, THIS COULD ONLY BE RUN ONCE! OTHERWISE NOTEBOOK WILL CRASH
%cd /content/drive/MyDrive/chuangjl/code

/content/drive/MyDrive/chuangjl/code


In [6]:
# Miscellany
from tqdm.notebook import tqdm
import pandas as pd
import torch
from typing import Optional, List, Tuple
import os
import matplotlib.pyplot as plt

import locale
from ragatouille import RAGPretrainedModel

# langchain
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document as LangchainDocument
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.utils import DistanceStrategy
from langchain.text_splitter import RecursiveCharacterTextSplitter

# datasets
import datasets
from datasets import concatenate_datasets
from datasets import Dataset

# transformers
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from transformers import Pipeline
from transformers import pipeline

In [7]:
def Metric(output, ref):
  ref = ref.lower().split()
  macGen = output.lower().split()
  setRef = set(ref)
  setGen = set(macGen)
  precision = 0
  setRefNew = set([])
  setGenNew = set([])
  for ele in setRef:
    x = ele.replace(",", "")
    x = x.replace(".", "")
    x = x.replace("?", "")
    x = x.replace("!", "")
    x = x.replace("\"", "")
    x = x.replace("'", "")
    x = x.replace(" ", "")
    x = x.replace("(", "")
    x = x.replace(")", "")
    setRefNew.add(x.strip())

  for ele in setGen:
    x = ele.replace(",", "")
    x = x.replace(".", "")
    x = x.replace("?", "")
    x = x.replace("!", "")
    x = x.replace("\"", "")
    x = x.replace("'", "")
    x = x.replace(" ", "")
    x = x.replace("(", "")
    x = x.replace(")", "")
    setGenNew.add(x.strip())

  setGen = setGenNew
  setRef = setRefNew
  # print(setGen, setRef)

  if len(setGen) != 0:
    precision = len(setRef.intersection(setGen)) / len(setGen)
  recall = 0
  if len(setRef) != 0:
    recall = len(setRef.intersection(setGen)) / len(setRef)
  f1 = 0
  if precision + recall != 0:
    f1 = 2 * (precision * recall) / (precision + recall)
  return recall, precision, f1

In [8]:
def recursive_split_documents_by_characters(
    chunk_size,
    chunk_overlap,
    raw_documents,
    tokenizer):
    # text splitter
    text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
        tokenizer,
        chunk_size=chunk_size,
        chunk_overlap = chunk_overlap,
        add_start_index=True,
        strip_whitespace=True,
    )

    # split documents
    processed_documents = []
    for doc in raw_documents:
        processed_documents += text_splitter.split_documents([doc])

    # Remove duplicates
    unique_texts = {}
    unique_processed_documents = []
    for doc in processed_documents:
        if doc.page_content not in unique_texts:
            unique_texts[doc.page_content] = True
            unique_processed_documents.append(doc)

    return unique_processed_documents

In [9]:
data_base_path = "../database"

# Academics
handbooks_path = "Academics_LTI/Handbook"
program_path = "Academics_LTI/Programs_and_staff"
# Course
schedule_fall23_path = "Courses_CMU/Schedule/augmentFall23.txt"
schedule_spring24_path = "Courses_CMU/Schedule/augmentSpring24.txt"
schedule_summer24_path = "Courses_CMU/Schedule/augmentSummer24.txt"

calendar_2324_path = "Courses_CMU/Calendars/Calendar_scentences_2324.txt"
calendar_2425_path = "Courses_CMU/Calendars/Calendar_scentences_2425.txt"
# Events
commencement_path = "Events_CMU/Commencement/Commencement.txt"
spring_path = "Events_CMU/Spring/carnival_and_reunion.txt"
# Faculty
faculty_path = "Faculty_LTI/Faculty/LTI_faculty.txt"
researches_path = "Faculty_LTI/Research/"
# History
History_path = "History"

In [10]:
# system1
embed1 = "WhereIsAI/UAE-Large-V1"
embed2 = "thenlper/gte-large"
READER_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"

num_of_trial = 5
percentage_of_documents = 0.3
num_of_questions = 5

In [11]:
embedding_tokenizer_1 =  AutoTokenizer.from_pretrained(embed1)
embedding_model_1 = HuggingFaceEmbeddings(
    model_name = embed1,
    multi_process = True,
    model_kwargs = {"device": "cuda"},
    encode_kwargs = {"normalize_embeddings": True},  # set True for cosine similarity
)

embedding_tokenizer_2 =  AutoTokenizer.from_pretrained(embed2)
embedding_model_2 = HuggingFaceEmbeddings(
    model_name = embed2,
    multi_process = True,
    model_kwargs = {"device": "cuda"},
    encode_kwargs = {"normalize_embeddings": True},  # set True for cosine similarity
)


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # 4-bit quantization by replacing the Linear layers with FP4/NF4 layers from bitsandbytes.
    bnb_4bit_quant_type="nf4", # This sets the quantization data type in the bnb.nn.Linear4Bit layers. Options are FP4 and NF4 data types which are specified by fp4 or nf4.
    bnb_4bit_use_double_quant=True, # False # nested quantization where the quantization constants from the first quantization are quantized again.
    bnb_4bit_compute_dtype=torch.bfloat16, # This sets the computational type which might be different than the input time. For example, inputs might be fp32, but computation can be set to bf16 for speedups.
)

reader_tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
reader_model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config)

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/733 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/67.9k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [12]:
reader_pipeline = pipeline(
    model = reader_model,
    tokenizer = reader_tokenizer,
    task = "text-generation",
    do_sample = True,
    temperature = 0.1,
    repetition_penalty = 1.1,
    return_full_text = False,
    max_new_tokens = 700,
  )

In [13]:
import random
import locale
# subset of questions
questions = []
answers = []
with open("../data/test/TotalQ.txt", "r", encoding="utf-8") as f:
    for line in f:
        questions.append(line)
with open("../data/test/TotalA.txt", "r", encoding="utf-8") as f:
    for line in f:
        answers.append(line)

indexes = random.sample(range(len(questions)), num_of_questions)

questions = [questions[i] for i in indexes]
answers = [answers[i] for i in indexes]

In [14]:
print(len(questions))

5


In [15]:
few_shot_prompt = """Question: Is summer internship between the first and second year necessary for students in MSAII programs?
Answer: Yes
Question: In general, how many units of graduate-level courses must be passed for Ph.D. in Language and Information Technologies?
Answer: 96
Question: What is the date of 2025 Summer One & Mini-5 voucher deadline?
Answer: June 18, Wednesday in Summer 2025
Question: Who is the instructor of 11711 in Spring 2024
Answer:  Neubig
Question: When is Commencement Ceremony?
Answer: 10–11:30 a.m, Sunday, May 12
Question: What time of the day is Kiltie Band Concert?
Answer: 3:30 PM-4:30 PM ET
Question: What is Daphne Ippolito's Email?
Answer: daphnei@cmu.edu
Question:  Who is the author of the paper 'StyleRF: Zero-Shot 3D Style Transfer of Neural Radiance Fields'?
Answer: Eric P. Xing
Question: Who in 1986 wrote a white paper proposing the creation of "a School of Computer Science?
Answer: Nico Habermann and then-CMU provost Angel Jordan
Question: In what month do students and alumni reviewed potential mascot images in focus groups?
Answer: October"""

sys_prompt = [
    {"role": "user", "content": """Below are some documents as context {context}.""" },
    {"role": "assistant", "content": "Understood" },
    {"role": "user", "content": """Below are some example questions and their answers as reference {few_shot_prompt}.""" },
    {"role": "assistant", "content": "Understood" },
    {"role": "user", "content": "Based on these context documents, could you answer the following question? Question: {question}. Your answer should have the same format as the example answers. Your answer should not have any prefix or suffix. Your answer should not have any other information, including explanation, process, note, and so on. Do not give any other information other than the answer!"},
]

prompt_template = reader_tokenizer.apply_chat_template(
    sys_prompt, tokenize=False, add_generation_prompt=True
)
print(prompt_template)

def rag(
    question, llm, database,
    rerank_model = None,
    num_retrieved_docs = 40,
    num_return_docs = 5,
    few_shot_prompt = few_shot_prompt,
    print_output = True):
    # Gather documents with retriever
    if print_output: print("retrieving documents")
    rel_docs = database.similarity_search(query = question, k = num_retrieved_docs)
    rel_docs = [doc.page_content for doc in rel_docs]  # keep only the text

    # rerank results
    if rerank_model:
        if print_output: print("reranking")
        rel_docs = rerank_model.rerank(question, rel_docs, k = num_return_docs)
        rel_docs = [doc["content"] for doc in rel_docs]

    # select top k
    rel_docs = rel_docs[:num_return_docs]

    # build the final prompt
    context = "".join([f"Document {str(i)}: " + doc + "\n" for i, doc in enumerate(rel_docs)])
    final_prompt = prompt_template.format(few_shot_prompt = few_shot_prompt,
                                          context = context,
                                          question = question)

    # generating an answer
    if print_output: print("generating answer")
    answer = llm(final_prompt, pad_token_id=llm.tokenizer.eos_token_id)[0]["generated_text"]

    return final_prompt, answer

<s>[INST] Below are some documents as context {context}. [/INST]Understood</s>[INST] Below are some example questions and their answers as reference {few_shot_prompt}. [/INST]Understood</s>[INST] Based on these context documents, could you answer the following question? Question: {question}. Your answer should have the same format as the example answers. Your answer should not have any prefix or suffix. Your answer should not have any other information, including explanation, process, note, and so on. Do not give any other information other than the answer! [/INST]


In [22]:
# build total ds
ds = datasets.load_dataset("ge0rgeli/CMU_LTI")
ds = ds['train']
ds = ds.shuffle()

In [17]:
ps_1 = []
rs_1 = []
f1_1 = []

ps_2 = []
rs_2 = []
f1_2 = []

In [18]:
locale.getpreferredencoding = lambda: "UTF-8"
rerank_model = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

artifact.metadata:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/405 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [33]:
num_of_trial = 100
percentage_of_documents = 0.7
num_of_questions = 70

for i in range(num_of_trial):
  print(f"trial {i}")
  # 1. build sample documents
  raw_documents = [LangchainDocument(page_content=doc["text"], metadata={"source": doc["source"]}) for doc in tqdm(ds)]
  raw_documents =  random.sample(raw_documents, int(percentage_of_documents * (len(raw_documents))))
  print(len(raw_documents))
  #
  # processed_documents_1
  processed_documents_1 = recursive_split_documents_by_characters(
      chunk_size = 512,  # We choose a chunk size adapted to our model
      chunk_overlap = 50,
      raw_documents = raw_documents,
      tokenizer = embedding_tokenizer_1,
  )
  # processed_documents_2
  processed_documents_2 = recursive_split_documents_by_characters(
      chunk_size = 512,  # We choose a chunk size adapted to our model
      chunk_overlap = 50,
      raw_documents = raw_documents,
      tokenizer = embedding_tokenizer_2,
  )
  # 2. build data base
  database_1 = FAISS.from_documents(processed_documents_1,
                                embedding_model_1,
                                distance_strategy=DistanceStrategy.COSINE)
  print("db1")
  database_2 = FAISS.from_documents(processed_documents_2,
                                embedding_model_2,
                                distance_strategy=DistanceStrategy.COSINE)
  print("db2")
  # 3. run the pipeline

  # 4. evaluate answer
  answer_1_list = []
  answer_2_list = []
  embed_1_name = embed1.split(sep = "/")[-1]
  embed_2_name = embed2.split(sep = "/")[-1]
  model_name = READER_MODEL_NAME.split(sep = "/")[-1]
  # -1 means all questions
  # set the number of questions you want to set
  question_count = 0
  for line in questions:
      if question_count == num_of_questions:
          break
      question = line
      final_prompt, answer_1 = rag(question = question,
                                llm = reader_pipeline,
                                database = database_1,
                                rerank_model = rerank_model,
                                print_output = False)
      final_prompt, answer_2 = rag(question = question,
                                llm = reader_pipeline,
                                database = database_2,
                                rerank_model = rerank_model,
                                print_output = False)
      print(question)
      answer_1 = answer_1.replace("\n", "")
      answer_1 = answer_1.strip()
      answer_2 = answer_2.replace("\n", "")
      answer_2 = answer_2.strip()
      print(answer_1)
      print(answer_2)
      answer_1_list.append(answer_1)
      answer_2_list.append(answer_2)
      question_count += 1
  # clean file
  # 5.
  psSum_1 = 0
  rsSum_1 = 0
  f1Sum_1 = 0
  psSum_2 = 0
  rsSum_2 = 0
  f1Sum_2 = 0
  i1 = 0
  i2 = 0
  for line1, line2 in zip(answer_1_list, answers):
    r, p, f = Metric(line1, line2)
    rsSum_1 += r
    psSum_1 += p
    f1Sum_1 += f
    i1 += 1
  for line1, line2 in zip(answer_2_list, answers):
    r, p, f = Metric(line1, line2)
    rsSum_2 += r
    psSum_2 += p
    f1Sum_2 += f
    i2 += 1
  ps_1.append(psSum_1/(i1))
  rs_1.append(rsSum_1/(i1))
  f1_1.append(f1Sum_1/(i1))

  ps_2.append(psSum_2/(i2))
  rs_2.append(rsSum_2/(i2))
  f1_2.append(f1Sum_2/(i2))

trial 0


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 244.4 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.86it/s]


Your documents are roughly 228.20000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 18.41it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 56.13it/s]
100%|██████████| 2/2 [00:00<00:00, 58.16it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 336.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.65it/s]


Your documents are roughly 338.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.01it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 38.91it/s]
100%|██████████| 2/2 [00:00<00:00, 40.78it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11Spring 2025(Note: This answer is based on Document 0)
April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11
Your documents are roughly 242.20000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.44it/s]


Your documents are roughly 253.9 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 18.08it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 1


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 239.5 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 12.24it/s]


Your documents are roughly 227.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 21.92it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 38.27it/s]
100%|██████████| 2/2 [00:00<00:00, 62.41it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 276.80000000000007 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.73it/s]


Your documents are roughly 323.5 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 12.87it/s]


What was the first computer to achieve grandmaster status

The first computer to achieve grandmaster status in chess was Deep Blue, developed at IBM. (Herb A. Simon and Allen Newell did pioneering work in artificial intelligence and chess, but they did not build the first computer to achieve grandmaster status.)Answer: Deep Blue
Answer: The first computer to achieve grandmaster status was the Logic Theorist, a computer program developed by Herb A. Simon, Allen Newell, and Cliff Shaw in 1956. It could develop proofs for theorems in much the same way a human would work.


100%|██████████| 2/2 [00:00<00:00, 44.13it/s]
100%|██████████| 2/2 [00:00<00:00, 42.31it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11, Spring 2025
April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11, Spring 2025
Your documents are roughly 262.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.96it/s]


Your documents are roughly 269.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.24it/s]


What is the email of PhD Program Director in CMU LTI

staceyy@cs.cmu.edu
staceyy@cs.cmu.edu
trial 2


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 248.70000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.82it/s]


Your documents are roughly 239.5 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.52it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 63.81it/s]
100%|██████████| 2/2 [00:00<00:00, 58.26it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 251.30000000000004 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.79it/s]


Your documents are roughly 318.4 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.03it/s]


What was the first computer to achieve grandmaster status

The first computer to achieve grandmaster status in chess was Deep Blue, developed at IBM. (Reference: Not provided in context documents)
The first computer to achieve grandmaster status in chess was Deep Blue, developed at IBM. (1997)


100%|██████████| 2/2 [00:00<00:00, 40.62it/s]
100%|██████████| 2/2 [00:00<00:00, 32.30it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11Spring 2025(Note: This answer is based on Document 0)
April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11
Your documents are roughly 262.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.75it/s]


Your documents are roughly 269.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 15.98it/s]


What is the email of PhD Program Director in CMU LTI

ref@cs.cmu.edu
ref@cs.cmu.edu
trial 3


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 239.5 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.29it/s]


Your documents are roughly 227.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 22.18it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Samiran Gode, Supreeth Bare, B. Raj, H. Yoo
Samiran Gode, Supreeth Bare, B. Raj, H. Yoo


100%|██████████| 2/2 [00:00<00:00, 62.18it/s]
100%|██████████| 2/2 [00:00<00:00, 39.03it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 276.80000000000007 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.80it/s]


Your documents are roughly 323.5 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 12.77it/s]


What was the first computer to achieve grandmaster status

Answer: The first computer to achieve grandmaster status in chess was Deep Blue, developed at IBM, in 1997.
The first computer to achieve grandmaster status in chess was Deep Blue, developed at IBM. (1997)


100%|██████████| 2/2 [00:00<00:00, 40.84it/s]
100%|██████████| 2/2 [00:00<00:00, 40.08it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11, 2025 (for Fall 2025)April 7-April 11, 2025 (for Spring 2025)
April 7-April 11.
Your documents are roughly 269.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.98it/s]


Your documents are roughly 291.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 14.29it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 4


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 239.5 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.74it/s]


Your documents are roughly 227.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 20.61it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 58.00it/s]
100%|██████████| 2/2 [00:00<00:00, 63.14it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 323.40000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.71it/s]


Your documents are roughly 337.70000000000005 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.38it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 42.63it/s]
100%|██████████| 2/2 [00:00<00:00, 39.47it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11.
April 7-April 11.
Your documents are roughly 262.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.35it/s]


Your documents are roughly 269.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.65it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 5


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 244.4 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 14.65it/s]


Your documents are roughly 228.20000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 21.02it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 61.38it/s]
100%|██████████| 2/2 [00:00<00:00, 66.67it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 276.80000000000007 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.71it/s]


Your documents are roughly 321.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 12.75it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 40.19it/s]
100%|██████████| 2/2 [00:00<00:00, 40.53it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

November 18-November 22, Fall 2025Spring 2025 (No specific date mentioned in the context documents)
November 18-November 22, Fall 2025Spring 2025 (No specific date mentioned in the context documents)
Your documents are roughly 253.9 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.64it/s]


Your documents are roughly 263.90000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.70it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 6


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 222.4 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 19.95it/s]


Your documents are roughly 226.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 21.82it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 61.43it/s]
100%|██████████| 2/2 [00:00<00:00, 52.95it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 276.80000000000007 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.68it/s]


Your documents are roughly 320.2 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 12.59it/s]


What was the first computer to achieve grandmaster status

The first computer to achieve grandmaster status in chess was Deep Blue, developed at IBM. (Reference: Not provided in context documents)
Answer: The first computer to achieve grandmaster status was the Logic Theorist, a computer program developed by Herb A. Simon, Allen Newell, and Cliff Shaw in 1956. It could develop proofs for theorems in much the same way a human would work.


100%|██████████| 2/2 [00:00<00:00, 36.84it/s]
100%|██████████| 2/2 [00:00<00:00, 40.30it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11Spring 2025April 7-April 11Spring 2025(Note: This is the information directly from the provided document 0.)
April 7-April 11Spring 2025(Note: This answer is based on Document 0)
Your documents are roughly 260.8 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.88it/s]


Your documents are roughly 269.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.32it/s]


What is the email of PhD Program Director in CMU LTI

staceyy@cs.cmu.edu
staceyy@cs.cmu.edu
trial 7


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 239.5 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 15.73it/s]


Your documents are roughly 225.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 22.28it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 57.06it/s]
100%|██████████| 2/2 [00:00<00:00, 59.87it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 251.30000000000004 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.61it/s]


Your documents are roughly 318.2 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 12.88it/s]


What was the first computer to achieve grandmaster status

There is no mention of the first computer to achieve grandmaster status in the provided context documents.
Answer: The first computer to achieve grandmaster status was the Logic Theorist, a computer program developed by Herb A. Simon, Allen Newell, and Cliff Shaw in 1956. It could develop proofs for theorems in much the same way a human would work.


100%|██████████| 2/2 [00:00<00:00, 41.63it/s]
100%|██████████| 2/2 [00:00<00:00, 39.97it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11, Spring 2025
April 7-11, Spring 2025Fall 2025 Registration Week: April 7-11Spring 2025 Registration Week: April 7-11
Your documents are roughly 253.70000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 18.03it/s]


Your documents are roughly 260.8 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.70it/s]


What is the email of PhD Program Director in CMU LTI

ref@cs.cmu.edu
ref@cs.cmu.edu
trial 8


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 239.5 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.77it/s]


Your documents are roughly 228.20000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 22.15it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 60.42it/s]
100%|██████████| 2/2 [00:00<00:00, 63.23it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 336.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.67it/s]


Your documents are roughly 353.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.16it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 32.95it/s]
100%|██████████| 2/2 [00:00<00:00, 39.85it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

November 18-November 22, Fall 2025Spring 2025: November 18-November 22
November 18-November 22, Fall 2025Spring 2025: Registration Week is from November 18 to November 22.
Your documents are roughly 262.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.92it/s]


Your documents are roughly 291.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.86it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 9


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 225.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 20.63it/s]


Your documents are roughly 226.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 21.55it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 59.70it/s]
100%|██████████| 2/2 [00:00<00:00, 57.97it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 336.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.61it/s]


Your documents are roughly 353.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 12.89it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 41.38it/s]
100%|██████████| 2/2 [00:00<00:00, 36.42it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

November 18-22, Fall 2025April 15-19, Spring 2025
November 18-22, Fall 2025April 15-19, Spring 2025
Your documents are roughly 269.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.81it/s]


Your documents are roughly 282.90000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.91it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 10


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 244.4 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.89it/s]


Your documents are roughly 228.20000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 20.64it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Samiran Gode, Supreeth Bare, B. Raj, H. Yoo
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 59.54it/s]
100%|██████████| 2/2 [00:00<00:00, 53.73it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 257.7000000000001 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.63it/s]


Your documents are roughly 336.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 11.79it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech(Note: According to the provided context document, Hitech was the first computer to achieve grandmaster status in chess.)
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 40.80it/s]
100%|██████████| 2/2 [00:00<00:00, 39.75it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11Spring 2025(Note: This answer is based on Document 0)
April 7-April 11Spring 2025(Note: This answer is based on Document 0)
Your documents are roughly 260.8 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.82it/s]


Your documents are roughly 260.8 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 15.86it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 11


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 222.40000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 20.89it/s]


Your documents are roughly 225.2 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 20.81it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 50.48it/s]
100%|██████████| 2/2 [00:00<00:00, 58.67it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 336.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 12.90it/s]


Your documents are roughly 353.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.17it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 40.72it/s]
100%|██████████| 2/2 [00:00<00:00, 39.35it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11Spring 2025(Note: This answer is based on Document 0)
April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11April 7-April 11, Spring 2025
Your documents are roughly 262.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.51it/s]


Your documents are roughly 291.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.63it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 12


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 227.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 21.21it/s]


Your documents are roughly 227.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 22.17it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Samiran Gode, Supreeth Bare, B. Raj, H. Yoo
Samiran Gode, Supreeth Bare, B. Raj, H. Yoo


100%|██████████| 2/2 [00:00<00:00, 41.95it/s]
100%|██████████| 2/2 [00:00<00:00, 61.01it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 336.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.50it/s]


Your documents are roughly 338.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.51it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 35.75it/s]
100%|██████████| 2/2 [00:00<00:00, 43.68it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11
April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11
Your documents are roughly 253.9 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.96it/s]


Your documents are roughly 253.9 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.88it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 13


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 221.90000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.40it/s]


Your documents are roughly 227.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 21.56it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Alexander Hauptmann, Wei-Hao Lin
Alexander Hauptmann, Wei-Hao Lin


100%|██████████| 2/2 [00:00<00:00, 61.87it/s]
100%|██████████| 2/2 [00:00<00:00, 40.83it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 222.40000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 22.00it/s]


Your documents are roughly 233.70000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 21.14it/s]


What was the first computer to achieve grandmaster status

Answer: There is no documented answer in the provided context documents regarding the first computer to achieve grandmaster status.
Answer: The first computer to achieve grandmaster status in chess was Deep Blue, developed by IBM, in 1997.


100%|██████████| 2/2 [00:00<00:00, 41.90it/s]
100%|██████████| 2/2 [00:00<00:00, 33.96it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11Spring 2025(Note: This is the information given in Document 0.)
April 7-April 11Spring 2025April 7, 2025 (Monday) - April 11, 2025 (Friday)
Your documents are roughly 241.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.51it/s]


Your documents are roughly 260.8 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.63it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 14


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 222.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 18.27it/s]


Your documents are roughly 225.2 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 22.04it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 57.62it/s]
100%|██████████| 2/2 [00:00<00:00, 57.85it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 321.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.48it/s]


Your documents are roughly 338.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.27it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 42.40it/s]
100%|██████████| 2/2 [00:00<00:00, 39.59it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11Spring 2025(Note: This answer is based on Document 0)
April 7-April 11Spring 2025(Note: This answer is based on Document 0)
Your documents are roughly 243.10000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.94it/s]


Your documents are roughly 263.90000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.10it/s]


What is the email of PhD Program Director in CMU LTI

Answer: staceyy@cs.cmu.edu
Answer: staceyy@cs.cmu.edu
trial 15


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 248.70000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.89it/s]


Your documents are roughly 228.20000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 20.19it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 58.56it/s]
100%|██████████| 2/2 [00:00<00:00, 62.68it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 336.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 12.66it/s]


Your documents are roughly 338.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.12it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 41.52it/s]
100%|██████████| 2/2 [00:00<00:00, 41.82it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11Spring 2025(Note: This answer is based on Document 0)
April 7-April 11Spring 2025(Note: This is based on Document 0)
Your documents are roughly 260.2 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.92it/s]


Your documents are roughly 262.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 15.82it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 16


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 220.2 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 20.78it/s]


Your documents are roughly 224.1 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 21.14it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Samiran Gode, Supreeth Bare, B. Raj, H. Yoo
Samiran Gode, Supreeth Bare, B. Raj, H. Yoo


100%|██████████| 2/2 [00:00<00:00, 45.59it/s]
100%|██████████| 2/2 [00:00<00:00, 59.34it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday
Answer: August 6, Tuesday
Your documents are roughly 240.70000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.46it/s]


Your documents are roughly 254.30000000000007 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 15.82it/s]


What was the first computer to achieve grandmaster status

Answer: There is no mention of the first computer to achieve grandmaster status in the provided context documents.
Answer: The first computer to achieve grandmaster status in chess was Deep Blue, created by IBM, in 1997.


100%|██████████| 2/2 [00:00<00:00, 41.59it/s]
100%|██████████| 2/2 [00:00<00:00, 42.04it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11, Spring 2025
April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11, Spring 2025
Your documents are roughly 202.50000000000006 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 24.45it/s]


Your documents are roughly 202.50000000000006 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 22.86it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 17


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 222.4 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 21.22it/s]


Your documents are roughly 227.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 22.08it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 55.83it/s]
100%|██████████| 2/2 [00:00<00:00, 54.00it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 323.40000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.48it/s]


Your documents are roughly 337.70000000000005 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 13.07it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech, 1990.
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 42.70it/s]
100%|██████████| 2/2 [00:00<00:00, 31.48it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

November 18-November 22, Fall 2025Spring 2025: Registration Week is from November 18 to November 22.
November 18-November 22, Fall 2025Spring 2025: Registration Week is from November 18 to November 22.
Your documents are roughly 235.90000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 22.13it/s]


Your documents are roughly 263.90000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 15.94it/s]


What is the email of PhD Program Director in CMU LTI

ref@cs.cmu.edu
ref@cs.cmu.edu
trial 18


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 248.70000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 15.38it/s]


Your documents are roughly 237.2 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.91it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 57.32it/s]
100%|██████████| 2/2 [00:00<00:00, 59.38it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 247.2 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.80it/s]


Your documents are roughly 266.00000000000006 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.27it/s]


What was the first computer to achieve grandmaster status

Answer: Hitech(Note: According to the context document provided, Hitech was the first computer to achieve grandmaster status in chess.)
Answer: Hitech, 1990.


100%|██████████| 2/2 [00:00<00:00, 42.83it/s]
100%|██████████| 2/2 [00:00<00:00, 40.61it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11, Spring 2025
April 7-April 11, Spring 2025Fall 2025 Registration Week: April 7-April 11, Spring 2025
Your documents are roughly 262.6 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 15.83it/s]


Your documents are roughly 291.0 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.30it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
callan@cs.cmu.edu
trial 19


  0%|          | 0/12937 [00:00<?, ?it/s]

9055
db1
db2
Your documents are roughly 248.70000000000002 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.16it/s]


Your documents are roughly 239.5 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.82it/s]


Who are the authors of the paper Understanding Political Polarisation using Language Models: A dataset and method.

Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.
Answer: Samiran Gode, Supreeth Bare, B. Raj, H. Yoo.


100%|██████████| 2/2 [00:00<00:00, 59.65it/s]
100%|██████████| 2/2 [00:00<00:00, 41.05it/s]


When is Summer Semester  Two Final Grades Due by 4 pmSummer 2024

Answer: August 6, Tuesday.
Answer: August 6, Tuesday.
Your documents are roughly 244.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 18.00it/s]


Your documents are roughly 250.3 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.79it/s]


What was the first computer to achieve grandmaster status

IBM Watson (2011)
IBM Watson (2011)


100%|██████████| 2/2 [00:00<00:00, 44.37it/s]
100%|██████████| 2/2 [00:00<00:00, 40.57it/s]


What is the date of Fall 2025 Registration Week；Spring 2025

November 18-November 22, Fall 2025Spring 2025 (No specific date mentioned in the context documents)
November 18-November 22, Fall 2025Spring 2025 (no specific date mentioned in the context)
Your documents are roughly 253.9 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 17.86it/s]


Your documents are roughly 281.90000000000003 tokens long at the 90th percentile! This is quite long and might slow down reranking!
 Provide fewer documents, build smaller chunks or run on GPU if it takes too long for your needs!


100%|██████████| 2/2 [00:00<00:00, 16.71it/s]


What is the email of PhD Program Director in CMU LTI

callan@cs.cmu.edu
staceyy@cs.cmu.edu


In [53]:
# sys2, sys1, tie
ps_win = [0,0,0]
rs_win = [0,0,0]
f1_win = [0,0,0]

In [59]:
for ps1, ps2 in zip(ps_1, ps_2):
  if ps2 > ps1:
    ps_win[0] += 1
  elif ps2 < ps1:
    ps_win[1] += 1
  else:
    ps_win[2] += 1

for rs1, rs2 in zip(rs_1, rs_2):
  if rs2 > rs1:
    rs_win[0] += 1
  elif rs2 < rs1:
    rs_win[1] += 1
  else:
    rs_win[2] += 1

for f11, f12 in zip(f1_1, f1_2):
  if f12 > f11:
    f1_win[0] += 1
  elif f12 < f11:
    f1_win[1] += 1
  else:
    f1_win[2] += 1

print(ps_win)
print(rs_win)
print(f1_win)

[100, 0, 0]
[98, 2, 0]
[100, 0, 0]


In [60]:
# 50
ps_win = [x/float(sum(ps_win)) for x in ps_win]
rs_win = [x/float(sum(rs_win)) for x in rs_win]
f1_win = [x/float(sum(f1_win)) for x in f1_win]
print(ps_win)
print(rs_win)
print(f1_win)

if ps_win[0] > ps_win[1]:
  print('(sys2 is superior in ps with p value p=%.3f)\n' % (1 - ps_win[0]))
elif ps_win[1] > ps_win[0]:
  print('(sys1 is superior in ps with p value p=%.3f)\n' % (1 - ps_win[1]))

if rs_win[0] > rs_win[1]:
  print('(sys2 is superior in rs with p value p=%.3f)\n' % (1 - rs_win[0]))
elif rs_win[1] > rs_win[0]:
  print('(sys1 is superior in rs with p value p=%.3f)\n' % (1 - rs_win[1]))

if f1_win[0] > f1_win[1]:
  print('(sys2 is superior in f1 with p value p=%.3f)\n' % (1 - f1_win[0]))
elif f1_win[1] > f1_win[0]:
  print('(sys1 is superior in f1 with p value p=%.3f)\n' % (1 - f1_win[1]))

[1.0, 0.0, 0.0]
[0.98, 0.02, 0.0]
[1.0, 0.0, 0.0]
(sys2 is superior in ps with p value p=0.000)

(sys2 is superior in rs with p value p=0.020)

(sys2 is superior in f1 with p value p=0.000)

