In [1]:
!pip install langchain langchain-openai faiss-cpu pandas langchain-ollama

Collecting langchain-ollama
  Using cached langchain_ollama-0.2.3-py3-none-any.whl.metadata (1.9 kB)
Collecting ollama<1,>=0.4.4 (from langchain-ollama)
  Using cached ollama-0.4.7-py3-none-any.whl.metadata (4.7 kB)
Using cached langchain_ollama-0.2.3-py3-none-any.whl (19 kB)
Using cached ollama-0.4.7-py3-none-any.whl (13 kB)
Installing collected packages: ollama, langchain-ollama
Successfully installed langchain-ollama-0.2.3 ollama-0.4.7


In [3]:
import os
os.environ["OPENAI_API_KEY"] = "sk" #openai 키 입력

from langchain_ollama import OllamaLLM
# DeepSeek R1 모델 로드
deepseek = OllamaLLM(model="deepseek-r1")

In [4]:
import pandas as pd # 엑셀 데이터를 불러와서 전처리
from langchain_openai import OpenAIEmbeddings # 텍스트 데이터를 벡터(Embedding)로 변환
from langchain.vectorstores import FAISS # 벡터 데이터를 저장하고 유사도 검색을 수행
from langchain.llms import OpenAI # GPT-4o 모델을 활용하여 검색된 데이터를 바탕으로 답변 생성
from langchain.chains import RetrievalQA # 검색된 데이터를 기반으로 LLM(GPT-4)이 질의응답(QA) 수행

# 엑셀 파일(car_prices.xlsx)을 불러와서 데이터프레임(df)에 저장
file_path = "d:/data/car_prices.xlsx"
df = pd.read_excel(file_path)

# 데이터 전처리 (데이터를 벡터화하기 전에 텍스트 형태로 변환)
df["text"] = df.apply(lambda row: f"{row['제조사']} {row['모델']} 가격: {row['가격(만원)']}만원", axis=1)

# 문장을 벡터(숫자 배열)로 변환
embeddings = OpenAIEmbeddings()

# 벡터로 변환 후 FAISS 인덱스에 저장 
vectorstore = FAISS.from_texts(df["text"].tolist(), embeddings)

# 추후 빠르게 불러올 수 있도록 FAISS 인덱스를 저장
faiss_db_path = "d:/data/faiss_car_prices"
vectorstore.save_local(faiss_db_path)

# 저장된 FAISS 데이터베이스를 로드하여 재사용
vectorstore = FAISS.load_local(faiss_db_path, embeddings, allow_dangerous_deserialization=True, )

# 유사한 데이터를 검색할 수 있는 retriever 생성, 최대 5개의 유사한 결과를 반환
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})

# 랭체인의 RetrievalQA를 사용하여 질의응답 시스템 생성
qa_chain = RetrievalQA.from_chain_type(llm=deepseek, retriever=retriever, chain_type="stuff")

# 사용자의 질문에 대한 답변 생성
query = "현대에서 가장 비싼 자동차는 무엇인가요?"
response = qa_chain.invoke(query)

# 결과 출력
print("📌 질문:", query)
print("💡 답변:", response)

📌 질문: 현대에서 가장 비싼 자동차는 무엇인가요?
💡 답변: {'query': '현대에서 가장 비싼 자동차는 무엇인가요?', 'result': "<think>\nOkay, I need to figure out which car is the most expensive among the ones mentioned. Let's see, the prices given are for Audi Q5, Audi A3, and two different prices for Volvo XC60.\n\nFirst, Audi Q5 is priced at 5,603,000 won. Then, Audi A3 is more expensive at 8,794,000 won. Now, looking at the Volvo XC60, there are three prices listed: 11,571,000 won, 5,507,000 won, and 10,311,000 won.\n\nComparing all these, the highest price is 11,571,000 won for the Volvo XC60. So, it seems like the Volvo XC60 is the most expensive among them.\n</think>\n\nThe most expensive car from the given options is the **вол沃 XC60** with a price of 11,571,000won. \n\nThe answer: **волwo XC60**"}


In [5]:
query = "아반떼의 제조사는? 한글로 답변해줘"
response = qa_chain.invoke(query)

# 결과 출력
print("📌 질문:", query)
print("💡 답변:", response)

📌 질문: 아반떼의 제조사는? 한글로 답변해줘
💡 답변: {'query': '아반떼의 제조사는? 한글로 답변해줘', 'result': '<think>\nOkay, so I\'m trying to figure out the manufacturer of "아반떼" using the context provided. Let\'s see what information is given here.\n\nFirst, there are two prices listed for "현대 아반떼": 5631000만원 and 5112000만원. I know that "현대" translates to "Hyundai" in Korean. So, this makes me think that Hyundai is the manufacturer of 아반떼.\n\nThen there are prices for "라ム보르기니 아בות도르" which translate to "Lamborghini Avettore." These prices don\'t seem relevant to determining the manufacturer of 아반떼.\n\nNext, we have prices for "기아 스포티지," which is "Kia Spectra." Again, this doesn\'t help with finding the manufacturer of 아반떼 since Kia\'s vehicle names are different from Hyundai\'s.\n\nSo, putting it all together, the only connection I see between the manufacturer and the vehicle name is Hyundai\'s 아반떼. The other vehicles have their own manufacturers, which don\'t overlap with Hyundai in these contexts.\n</think>\n\n아반떼의 

In [11]:
query = "1억 이상하는 모든 자동차 모델을 알려줘?"
response = qa_chain.invoke(query)

# 결과 출력
print("📌 질문:", query)
print("💡 답변:", response)

📌 질문: 1억 이상하는 모든 자동차 모델을 알려줘?
💡 답변: {'query': '1억 이상하는 모든 자동차 모델을 알려줘?', 'result': "<think>\nOkay, I need to figure out which car models cost over 10 billion won. Let's look at the given prices.\n\nFirst,볼보 XC60 has three different prices listed: 11571000만원, 10311000만원, and 5507000만원. I need to convert these into billions to see which are over 10억. \n\n1억 won is equal to 10,000,000,000 won. So let's divide each price by 1억:\n\n- 11571000만원 ÷ 1억 = 1.1571억\n- 10311000만원 ÷ 1억 ≈ 1.0311억\n- 5507000만원 ÷ 1억 ≈ 0.5507억\n\nOnly the first two are over 1억, so both variants of Bolt XC60 qualify.\n\nNext, the Macarla 720S has prices: 41870000만원 and 42563000만원. Converting these:\n\n- 41870000만원 ÷ 1억 = 4.187억\n- 42563000만원 ÷ 1억 ≈ 4.2563억\n\nNeither of these reaches 10억, so Macarla 720S is below the threshold.\n\nTherefore, the car models over 10억 are the Bolt XC60 variants.\n</think>\n\nThe car models that cost over 10 billion won (10억) are:\n\n- Bolt EV(XC60) variants at 1.1571억 and 1.0311억"}
