# JSON

`.json` 확장자를 가지는 파일을 로더로 로드하는 방법을 살펴보겠습니다.

- 참고: https://python.langchain.com/docs/modules/data_connection/document_loaders/json


In [2]:
import json
from pathlib import Path
from pprint import pprint


file_path = "/content/people.json"
data = json.loads(Path(file_path).read_text())

pprint(data)

[{'address': {'city': '서울', 'street': '312번지', 'zipCode': '83795'},
  'age': 31,
  'carOwnership': True,
  'hobbies': ['요리', '음악 감상', '사진 촬영'],
  'isMarried': True,
  'name': '박시우',
  'phoneNumbers': ['483-4639-1933', '947-4179-7976']},
 {'address': {'city': '서울', 'street': '877번지', 'zipCode': '36780'},
  'age': 31,
  'carOwnership': True,
  'hobbies': ['여행', '음악 감상', '등산'],
  'isMarried': False,
  'name': '정수아',
  'phoneNumbers': ['337-5721-3227', '387-3768-9586']},
 {'address': {'city': '서울', 'street': '175번지', 'zipCode': '89067'},
  'age': 43,
  'carOwnership': True,
  'hobbies': ['등산', '독서', '게임'],
  'isMarried': False,
  'name': '최도윤',
  'phoneNumbers': ['354-5563-4638', '471-9212-1826']},
 {'address': {'city': '서울', 'street': '690번지', 'zipCode': '70635'},
  'age': 22,
  'carOwnership': False,
  'hobbies': ['여행', '등산', '게임'],
  'isMarried': False,
  'name': '정민준',
  'phoneNumbers': ['468-2796-2152', '922-5760-7030']},
 {'address': {'city': '서울', 'street': '151번지', 'zipCode': '7911

In [3]:
type(data[0])

dict

## JSONLoader

JSON 데이터의 메시지 키 내 content 필드 아래의 값을 추출하고 싶다고 가정하였을 때, 아래와 같이 JSONLoader를 통해 쉽게 수행할 수 있습니다.

In [5]:
!pip install -qU langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.13-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain<0.4.0,>=0.3.13 (from langchain-community)
  Downloading langchain-0.3.13-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.4.0,>=0.3.27 (from langchain-community)
  Downloading langchain_core-0.3.28-py3-none-any.whl.metadata (6.3 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.23.2-py3-none-any.whl.metadata (7.1 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-

In [7]:
pip install jq

Collecting jq
  Downloading jq-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Downloading jq-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (737 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/737.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m286.7/737.4 kB[0m [31m8.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m737.4/737.4 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jq
Successfully installed jq-1.8.0


In [10]:
from langchain_community.document_loaders import JSONLoader

# JSONLoader 생성
loader = JSONLoader(
    file_path="/content/people.json",
    jq_schema=".[].phoneNumbers",
    text_content=False,
)

# 문서 로드
docs = loader.load()

# 결과 출력
pprint(docs)

[Document(metadata={'source': '/content/people.json', 'seq_num': 1}, page_content='["483-4639-1933", "947-4179-7976"]'),
 Document(metadata={'source': '/content/people.json', 'seq_num': 2}, page_content='["337-5721-3227", "387-3768-9586"]'),
 Document(metadata={'source': '/content/people.json', 'seq_num': 3}, page_content='["354-5563-4638", "471-9212-1826"]'),
 Document(metadata={'source': '/content/people.json', 'seq_num': 4}, page_content='["468-2796-2152", "922-5760-7030"]'),
 Document(metadata={'source': '/content/people.json', 'seq_num': 5}, page_content='["751-2823-8259", "722-7267-9516"]'),
 Document(metadata={'source': '/content/people.json', 'seq_num': 6}, page_content='["462-4433-5968", "483-1709-4850"]'),
 Document(metadata={'source': '/content/people.json', 'seq_num': 7}, page_content='["382-2779-3692", "835-4343-5346"]'),
 Document(metadata={'source': '/content/people.json', 'seq_num': 8}, page_content='["136-2831-1021", "818-9721-7208"]'),
 Document(metadata={'source': '/