# **openai api , faiss를 부른다**

In [None]:
pip install faiss-cpu openai # faiss는 openai 호출 전 rag활용 목적

In [None]:
import openai
import os
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer # sentence_transformers로 단어간이 아니라 문장 간의 유사성을 벡터공간에서 찾는다
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline

import json
import time

# **openai 키**

In [None]:
openai.api_key = "Your api key here"

# **논문의 데이터 다운로드**

In [None]:
!pip install requests
import requests

In [None]:
# URL 설정
url = "https://raw.githubusercontent.com/ymt9/GSMIR/refs/heads/master/IR.json"

# 요청하여 데이터 다운로드
response = requests.get(url)

# 요청이 성공했는지 확인
if response.status_code == 200:
    # JSON 데이터를 파일로 저장
    with open("IR.json", "wb") as file:
        file.write(response.content)
    print("다운로드 완료: IR.json 파일이 저장되었습니다.")
else:
    print(f"파일 다운로드 실패. 상태 코드: {response.status_code}")

다운로드 완료: IR.json 파일이 저장되었습니다.


구글 드라이브 마운트

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 파일을 Google Drive로 이동
!cp /content/IR.json /content/drive/MyDrive/IR.json

MyDrive에서 json 파일 읽어오기

In [None]:
# JSON 파일 경로 설정
file_path = '/content/drive/MyDrive/IR.json'  # 파일 경로 수정

# JSON 파일 열고 데이터 로드
with open(file_path, 'r') as f:
    data = json.load(f)

data 출력

In [None]:
print(data)

# **문서 임베딩 공간 생성**

# **받아온 논문의 데이터를 SentenceTransformer에 넣을 수 있는 document로 형식을 변환**

context , question , answer , con를 받아 무엇이 irrelevant 단어들인지 **파악한다**

In [None]:
# documents 리스트 생성
documents = []
for item in data["data"]:
    # 각 항목의 텍스트 필드를 결합하여 하나의 문자열로 생성
    document_text = f"{item['context']} {item['question']} {item['answer']} {item['con']}"
    documents.append(document_text)

documents 결과 확인

In [None]:
documents

document

In [None]:
# 사전 학습된 임베딩 모델 로드 (예: all-MiniLM-L6-v2)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = embedding_model.encode(documents)

In [None]:
# 2. FAISS 인덱스 구축
index = faiss.IndexFlatL2(doc_embeddings.shape[1])  # L2 거리 사용
index.add(np.array(doc_embeddings))  # 임베딩 추가

In [None]:
# 3. 질문을 통한 관련 문서 검색
def retrieve_relevant_documents(query, top_k=2):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(np.array(query_embedding), top_k)
    return [documents[i] for i in indices[0]]

## **ChatGPT API 호출**

In [None]:
# 4. ChatGPT API 호출
def generate_answer(query):
    # 질문에 관련된 문서 검색
    related_docs = retrieve_relevant_documents(query)
    context = "\n".join(related_docs)  # 검색된 문서들을 하나의 텍스트로 결합

    # ChatGPT API 호출
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Context: {context}\nQuestion: {query}"},
        ],
        max_tokens=1000,
        temperature=0.7
    )

    return response.choices[0].message["content"]

###
#rag를 쓰지 않는 chatgpt api 호출

# 4. ChatGPT API 호출
def generate_answer_without_context(query):

    # ChatGPT API 호출
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Question: {query}"},
        ],
        max_tokens=1000,
        temperature=0.7
    )

    return response.choices[0].message["content"]

# **출력 결과**

질문

# **generate_answer에 오류가 생기므로 openai 버전을 downgrade시킨다**

In [None]:
!pip install openai==0.28

# **Original query**

In [None]:
query = "John pays for half the cost of raising a child. It cost $10,000 a year for the first 8 years and then twice that much per year until the child is 18. University tuition then costs $250,000. How much did it cost?"

아무 context 투입 없는 **prompt**

In [None]:
#질문에 대한 답변 생성
#answer = generate_answer(query)

start_time = time.time()
answer = generate_answer_without_context(query)
end_time = time.time()

# 걸린 시간 출력
elapsed_time = end_time - start_time
print(f"걸린 시간 : {elapsed_time}초")
print("Answer:", answer)

걸린 시간 : 5.012379169464111초
Answer: To calculate the total cost of raising the child, we need to break it down into segments according to the information provided.

1. **Cost for the first 8 years:**
   - Cost per year: $10,000
   - Total cost for 8 years: 
   \[
   8 \times 10,000 = 80,000
   \]

2. **Cost from age 9 to age 18:**
   - Cost per year after age 8: $20,000 (since it's twice the cost of the first 8 years)
   - Duration: 10 years (from age 9 to age 18)
   - Total cost for these 10 years:
   \[
   10 \times 20,000 = 200,000
   \]

3. **University tuition:**
   - Total cost: $250,000

Now, we sum all these costs together to find the total cost of raising the child:

\[
\text{Total cost} = \text{Cost for the first 8 years} + \text{Cost from age 9 to 18} + \text{University tuition}
\]
\[
= 80,000 + 200,000 + 250,000 = 530,000
\]

Since John pays for half the cost of raising the child, we can find out how much he pays:

\[
\text{John's cost} = \frac{530,000}{2} = 265,000
\]

Thus

# **irrelevant 정보가 포함된 query**

In [None]:
query = "John pays for half the cost of raising a child. It cost $10,000 a year for the first 8 years and then twice that much per year until the child is 18. University tuition then costs $250,000.He observed that the ratio of paper money to coins was 1:3."

아무 context 투입 없는 **prompt**

In [None]:
#질문에 대한 답변 생성
#answer = generate_answer(query)

start_time = time.time()
answer = generate_answer_without_context(query)
end_time = time.time()

# 걸린 시간 출력
elapsed_time = end_time - start_time
print(f"걸린 시간 : {elapsed_time}초")
print("Answer:", answer)

걸린 시간 : 8.860401630401611초
Answer: To calculate the total cost of raising the child until the age of 18 and the university tuition, we can break it down into two parts: the first 8 years and the following 10 years.

1. **Cost for the first 8 years:**
   - Annual cost: $10,000
   - Total cost for 8 years = $10,000/year × 8 years = $80,000

2. **Cost for the next 10 years (from age 8 to 18):**
   - Annual cost: $10,000 × 2 = $20,000
   - Total cost for 10 years = $20,000/year × 10 years = $200,000

3. **University tuition:**
   - Total cost = $250,000

4. **Total cost of raising the child until age 18:**
   - Total cost = Cost for the first 8 years + Cost for the next 10 years + University tuition
   - Total cost = $80,000 + $200,000 + $250,000 = $530,000

5. **John pays for half the cost:**
   - John's total payment = $530,000 / 2 = $265,000

Now, regarding the ratio of paper money to coins being 1:3, this means for every 1 unit of paper money, there are 3 units of coins. If John has a 

# **Original query**

In [None]:
query = "John pays for half the cost of raising a child. It cost $10,000 a year for the first 8 years and then twice that much per year until the child is 18. University tuition then costs $250,000. How much did it cost?"

context가 추가된 **prompt**

In [None]:
#질문에 대한 답변 생성
#answer = generate_answer(query)

start_time = time.time()
answer = generate_answer(query)
end_time = time.time()

# 걸린 시간 출력
elapsed_time = end_time - start_time
print(f"걸린 시간 : {elapsed_time}초")
print("Answer:", answer)

걸린 시간 : 4.791372776031494초
Answer: To calculate the total cost of raising the child, we will break down the costs by age group.

1. **First 8 years**: 
   - Cost per year = $10,000
   - Total for 8 years = 8 * $10,000 = $80,000

2. **Years 9 to 18**:
   - Cost per year = $20,000 (which is twice the cost of the first 8 years)
   - Number of years = 10
   - Total for these years = 10 * $20,000 = $200,000

3. **University tuition**:
   - Total cost = $250,000

Now, we will add all these costs together:

- Total cost for the first 8 years: $80,000
- Total cost for years 9 to 18: $200,000
- University tuition: $250,000

Total cost = $80,000 + $200,000 + $250,000 = $530,000

Since John pays for half of the cost, we will divide the total by 2:

Half of the total cost = $530,000 / 2 = $265,000

Therefore, the total cost that John is responsible for is **$265,000**. 

Regarding irrelevant information, the statements about receipts for paying tuition fees and noticing $2000 in his pocket do not 

# **irrelevant 정보가 포함된 query**

In [None]:
query = "John pays for half the cost of raising a child. It cost $10,000 a year for the first 8 years and then twice that much per year until the child is 18. University tuition then costs $250,000.He observed that the ratio of paper money to coins was 1:3."

context가 있는 **query**

In [None]:
#질문에 대한 답변 생성
#answer = generate_answer(query)

start_time = time.time()
answer = generate_answer(query)
end_time = time.time()

# 걸린 시간 출력
elapsed_time = end_time - start_time
print(f"걸린 시간 : {elapsed_time}초")
print("Answer:", answer)

걸린 시간 : 6.234113454818726초
Answer: To calculate the total cost of raising a child until the age of 18, we can break it down as follows:

1. **Cost for the first 8 years**: 
   - Annual cost = $10,000
   - Total cost for 8 years = $10,000 * 8 = $80,000

2. **Cost for the next 10 years (from age 9 to 18)**: 
   - Annual cost = $10,000 * 2 = $20,000
   - Total cost for 10 years = $20,000 * 10 = $200,000

3. **University tuition**:
   - Total cost = $250,000

Now, let's add all these costs together:
- Total cost for raising the child = Cost for first 8 years + Cost for next 10 years + University tuition
- Total cost = $80,000 + $200,000 + $250,000 = $530,000

Since John pays for half the cost, we divide this total by 2:
- John's cost = $530,000 / 2 = $265,000

### Summary of Relevant and Irrelevant Information
- **Relevant Information**: 
  - The annual costs for the first 8 years and the subsequent years.
  - The cost of university tuition.
  - That John pays for half of these costs.

- *