In [1]:
from pathlib import Path
from tqdm import tqdm
tqdm.pandas()

BASE_DIR = Path().resolve().parent.parent.parent
gps_path = BASE_DIR / "data" / "gps_all"
save_path = BASE_DIR / "AI 모델" / "1.모델소스코드" / "3.여행루트추천" / "data"
print(gps_path)
print(save_path)


C:\Users\SSAFY\Desktop\data\gps_all
C:\Users\SSAFY\Desktop\AI 모델\1.모델소스코드\3.여행루트추천\data


In [2]:
from glob import glob
gps_list = glob(str(gps_path) + "/*.csv")
print(len(gps_list))
print(gps_list[0])

0


IndexError: list index out of range

In [None]:
import os
import sys
from functools import lru_cache
from dotenv import load_dotenv
import requests

load_dotenv("../.env")
KAKAO_REST_API_KEY = os.getenv("KAKAO_REST_API_KEY")

In [None]:
# CT1 : 문화시설
# AT4 : 관광명소
# AD5 : 숙박
# FD6 : 음식점
# CE7 : 카페

In [None]:
@lru_cache(maxsize=50000)
def reverse_geocode(lat, lon):
    url = "https://dapi.kakao.com/v2/local/geo/coord2address.json"
    headers = {"Authorization": f"KakaoAK {KAKAO_REST_API_KEY}"}
    params = {"y": lat, "x": lon}

    res = requests.get(url, headers=headers, params=params).json()

    if res.get("documents"):
        address = None
        if res["documents"][0]["road_address"]:
            address = res["documents"][0]["road_address"]
        elif res["documents"][0]["address"]:
            address = res["documents"][0]["address"]
            
        if address:
            add_dict = dict()
            add_dict["ADDRESS_FULL"] = address["address_name"]
            add_dict["ADDRESS_1DEPTH"] = address["region_1depth_name"]
            add_dict["ADDRESS_2DEPTH"] = address["region_2depth_name"]
            add_dict["ADDRESS_3DEPTH"] = address["region_3depth_name"]
            return add_dict
    else:
        return None

@lru_cache(maxsize=50000)
def get_nearby_poi(lat, lon, category):
    url = "https://dapi.kakao.com/v2/local/search/category.json"
    headers = {"Authorization": f"KakaoAK {KAKAO_REST_API_KEY}"}
    params = {
        "y": lat,
        "x": lon,
        "radius": 70,   # 50m 반경 내 장소 검색
        "category_group_code": category
    }
    res = requests.get(url, headers=headers, params=params).json()
    docs = res.get("documents", [])
    if docs:
        return docs[0]["place_name"]
    return None


In [None]:
@lru_cache(maxsize=50000)
def gps_to_place(lat, lon): # 긴거, 짧은거
    # 1) 주소 변환
    address = reverse_geocode(lat, lon)

    # 2) 장소 검색 (관광명소 > 문화시설 > 음식점 > 카페 > 숙박 순서)
    for cat in ["AT4", "CT1", "FD6", "CE7", "AD5"]:
        poi = get_nearby_poi(lat, lon, cat)
        if poi and address:
            address["ADDRESS_NAME"] = poi
            return address
        
    else:
        if address:
            address["ADDRESS_NAME"] = None
            return address
        return None
    
@lru_cache(maxsize=50000)
def gps_to_place_cached(lat, lon):
    return gps_to_place(lat, lon)


In [None]:
import pandas as pd
print(gps_list[60])
test = pd.read_csv(gps_list[60])
print(test.head(10))

In [None]:
# 상하 범위 : 33.114115402 - 33.56448276039651
# 좌우 범위 : 126.14625579156079 - 126.969676444

In [None]:
import numpy as np
def haversine(lon1, lat1, lon2, lat2):
    """
    위도/경도(도 단위) 2점 사이 거리(km)
    """
    R = 6371  # 지구 반지름 (km)
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
    c = 2*np.arcsin(np.sqrt(a))
    return R * c

def classify_row(row,
                 stay_radius_km=0.5,      # 체류 반경 (~200m)
                 stay_min_min=10,         # 체류 최소 시간
                 move_speed_kmh=15):       # 이동으로 볼 속도 기준
    if pd.isna(row['DIST_KM']) or pd.isna(row['DELTA_MIN']):
        return 'start'   # 여행 시작점
    # 체류
    if (row['DIST_KM'] <= stay_radius_km) and (row['DELTA_MIN'] >= stay_min_min):
        return 'stay'
    # 이동
    if row['SPEED_KMH'] >= move_speed_kmh:
        return 'move'
    # 그 외는 경유/애매지점
    return 'pass'




def csv_preprocessing(df):
    df = df.drop_duplicates("DT_MIN").drop_duplicates(["X_COORD","Y_COORD"])
    df = df[(df["X_COORD"].between(126.14625579156079, 126.969676444)) 
            &(df["Y_COORD"].between(33.114115402, 33.56448276039651))]
    # 공항 내 빼기
    df = df[~((df["X_COORD"].between(126.4891168819876, 126.501925142)) 
            &(df["Y_COORD"].between(33.50059843466816, 33.50769954833248)))]
    df['DT_MIN'] = pd.to_datetime(df['DT_MIN'])

    df = df.sort_values(['DT_MIN'])
    
    # # 10분 단위 슬롯
    # df['DT_10MIN'] = df['DT_MIN'].dt.floor('10min')

    # # 대표 지점: 여기선 평균 좌표 + 슬롯의 가장 이른 시간 사용
    # df_10 = (
    # df.groupby(['DT_10MIN'])
    #     .agg({
    #             'X_COORD': 'min',      # 경도 최빈
    #             'Y_COORD': 'min',      # 위도 최빈
    #             'DT_MIN': 'min'         # 슬롯 내 실제 첫 시각
    #     })
    #     .reset_index()
    #     .rename(columns={'DT_MIN': 'DT_REPR'})
    #     )

    # # 정렬 다시
    df_10 = df.sort_values(['DT_MIN'])# DT_REPR


    # 이전 포인트와 비교할 수 있도록 shift
    df_10[['X_PREV', 'Y_PREV', 'T_PREV']] = df_10[['X_COORD', 'Y_COORD', 'DT_MIN']].shift(1) # DT_REPR
            

    # 시간 차이 (분 단위)
    df_10['DELTA_MIN'] = (df_10['DT_MIN'] - df_10['T_PREV']).dt.total_seconds() / 60 # DT_REPR

    # 거리 (km)
    df_10['DIST_KM'] = haversine(
        df_10['X_PREV'], df_10['Y_PREV'],
        df_10['X_COORD'], df_10['Y_COORD']
        )

    # 속도 (km/h)
    df_10['SPEED_KMH'] = df_10['DIST_KM'] / (df_10['DELTA_MIN'] / 60)
    df_10['STATUS'] = df_10.apply(classify_row, axis=1)
    df_10 = df_10[df_10["STATUS"]=="stay"]

    
    
    return df_10.reset_index()[["MOBILE_NUM_ID", "X_COORD", "Y_COORD", "DT_MIN", "TRAVEL_ID"]]

In [None]:
test1 = csv_preprocessing(test)

In [None]:
test1.shape

In [None]:
test1

In [None]:
# 1. GPS → 장소 매핑
test1['address_info'] = test1.progress_apply(
    lambda row: gps_to_place_cached(row['Y_COORD'], row['X_COORD']),
    axis=1
)

# 2. None 제거
#test1 = test1[test1['address_info'].notna()].reset_index(drop=True)

# 3. dict 확장
test1_address = pd.json_normalize(test1['address_info'])

dup_cols = test1.columns.intersection(test1_address.columns)
test1_address = test1_address.drop(columns=dup_cols)

# 4. 원본과 합치기
test1 = pd.concat([test1, test1_address], axis=1)

# 5. 필요 없으면 address_info 컬럼 제거
test1.drop(columns=['address_info'], inplace=True)
cols = ["ADDRESS_FULL","ADDRESS_1DEPTH","ADDRESS_2DEPTH","ADDRESS_3DEPTH","ADDRESS_NAME"]

test1 = test1.dropna(subset=cols, how='all')
test1 = test1.drop_duplicates(["ADDRESS_NAME"])


In [None]:
test1

# 실제 데이터 확장

In [None]:
from tqdm import tqdm

In [None]:
len(gps_list)

In [None]:
for i, csv_file in enumerate(gps_list):
    fname = csv_file.split('_')[-1]
    if fname == "h000126.csv":
        print(i)
        break

In [None]:
import gc
for i, csv_file in tqdm(enumerate(gps_list)):
    if i <= 71: continue
    try:
        fname = csv_file.split('_')[-1]
        test = pd.read_csv(csv_file)
        test1 = csv_preprocessing(test)
        # 1. GPS → 장소 매핑
        test1['address_info'] = test1.progress_apply(
            lambda row: gps_to_place_cached(row['Y_COORD'], row['X_COORD']),
            axis=1
        )

        # 2. None 제거
        #test1 = test1[test1['address_info'].notna()].reset_index(drop=True)

        # 3. dict 확장
        test1_address = pd.json_normalize(test1['address_info'])

        dup_cols = test1.columns.intersection(test1_address.columns)
        test1_address = test1_address.drop(columns=dup_cols)

        # 4. 원본과 합치기
        test1 = pd.concat([test1, test1_address], axis=1)

        # 5. 필요 없으면 address_info 컬럼 제거
        test1.drop(columns=['address_info'], inplace=True)
        cols = ["ADDRESS_FULL","ADDRESS_1DEPTH","ADDRESS_2DEPTH","ADDRESS_3DEPTH","ADDRESS_NAME"]

        test1 = test1.dropna(subset=cols, how='all')
        test1 = test1.drop_duplicates(["ADDRESS_NAME"]).reset_index(drop=True)
        test1['ADDRESS_NAME'] = test1['ADDRESS_NAME'].replace(["", " ", "None"], pd.NA)
        test1 = test1.dropna(subset=['ADDRESS_NAME'])
        test1['date'] = test1['DT_MIN'].dt.date
        # 여행별 day index
        test1['TRAVEL_DAY'] = test1['date'].transform(lambda x: x.ne(x.shift())).cumsum()
        test1.drop(columns=['date'], inplace=True)
        test1.to_csv(save_path / fname)
        # 큰 변수 삭제 및 가비지 컬렉션 호출
        # del large_variable
        gc.collect()
        
    except:
        print(i, fname)

In [None]:
except_list = [
    69, 148, 170, 176, 193, 274, 276, 278, 284, 286, 
    287, 307, 367, 389, 417, 441, 459, 490, 546, 580, 
    623, 658, 681, 688, 747, 764, 784, 832, 841, 850, 
    852, 854, 870, 876, 879, 922, 929, 936, 955, 980,
    993, 1013, 1112, 1117, 1194, 1213, 1242, 1321, 1431, 1434,
    1437, 1448, 1499, 1518, 1541, 1545, 1547, 1549, 1554, 1555,
    1556, 1557, 1558, 1559, 1560, 1561, 1564, 1566, 1567, 1575,
    1576, 1577, 1579, 1580, 1581, 1584, 1585, 1587, 1588, 1589,
    1591, 1593, 1594, 1596, 1597, 1598, 1599, 1600, 1605, 1610, 
    1611, 1612, 1613, 1615, 1616, 1774,
    ]

### 위의 로직상의 'TRAVEL_DAY' 오류는 고쳤지만 시간이 오래걸리기에 아래 코드로 대신 수정

In [3]:
print(save_path)

C:\Users\User\Desktop\xodls\through_pjt_ai\AI 모델\1.모델소스코드\3.여행루트추천\data


In [4]:
csv_list = glob(str(save_path) + '/*.csv')

In [11]:
print(len(csv_list))
print(csv_list[0])

1675
C:\Users\User\Desktop\xodls\through_pjt_ai\AI 모델\1.모델소스코드\3.여행루트추천\data\h000001.csv


In [8]:
from tqdm import tqdm
def arrange_day(lst):
    tmp_lst = [0]*len(lst)
    tmp_day = 1
    for i, day in enumerate(lst):
        if i == 0:
            prev = day
            tmp_lst[i] = tmp_day
        else:
            if prev == day:
                tmp_lst[i] = tmp_day
            else:
                prev = day
                tmp_day += 1
                tmp_lst[i] = tmp_day
    else:
        return tmp_lst

In [10]:
a = [1,1,1,1,2,4,4,4,5,5]
print(arrange_day(a))

[1, 1, 1, 1, 2, 3, 3, 3, 4, 4]


In [13]:
import pandas as pd
for csv in tqdm(csv_list):
    df_c = pd.read_csv(csv)
    travel_day = df_c['TRAVEL_DAY'].to_list()
    df_c['TRAVEL_DAY'] = arrange_day(travel_day)
    df_c.to_csv(csv)
    

100%|██████████| 1675/1675 [00:35<00:00, 47.38it/s]


## GPS route data에 title과 description 생성

In [1]:
from pathlib import Path
from tqdm import tqdm
from glob import glob
tqdm.pandas()

BASE_DIR = Path().resolve().parent.parent
base_path = BASE_DIR / "AI" / "route_recommend"
save_path = base_path / "model"
gps_path = base_path / "data_local"
print(base_path)
print(save_path)
print(gps_path)

gps_list = glob(str(gps_path) + "/*.csv")
print(len(gps_list))
print(gps_list[0])

C:\Users\User\Desktop\xodls\JejuDoldam\AI\route_recommend
C:\Users\User\Desktop\xodls\JejuDoldam\AI\route_recommend\model
C:\Users\User\Desktop\xodls\JejuDoldam\AI\route_recommend\data_local
1675
C:\Users\User\Desktop\xodls\JejuDoldam\AI\route_recommend\data_local\h000001.csv


In [10]:
import os
import sys

from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel
from tqdm import tqdm
import json
import pandas as pd

sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname(BASE_DIR))))))

load_dotenv(".env")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

ai_client = OpenAI(api_key=OPENAI_API_KEY)

In [11]:
class GPSTitleDesc(BaseModel):
    TITLE: str
    DESCRIPTION: str

In [16]:
desc_data = []
for i, csv in tqdm(enumerate(gps_list)):
    traveler_id = csv.split('\\')[-1].split('.')[0]
    df = pd.read_csv(csv)
    use_cols = ["TRAVEL_DAY","ADDRESS_2DEPTH","ADDRESS_3DEPTH","ADDRESS_NAME"]
    df = df[use_cols]
    json_data = df.to_dict(orient="list")
    messages = [
            {
                "role": "system",
                "content": "너는 제주도 여행 전문 AI야."
                "'입력'을 보고 정보를 추출해서 데이터 format에 채워넣어야해."
                "'입력'만 참고해서 여행 루트의 특징을 요약해서 40자 이내로 출력해."
                "다음은 예시야."
                "예시: \n"
                "{TITLE: '서부 카페투어 루트 3일', DESCRIPTION: '협재, 애월 카페 위주의 여유로운 여행'}"
            },
            {
                "role": "user",
                "content": (
                    f"입력: {json_data}\n을 보고 format에 맞춰서 채워줘."
                    "TITLE 에는 '입력'의 'TRAVEL_DAY'를 참고해서 정보 요약 + 여행일자로 부탁해."
                    "DESCRIPTION에는 'ADDRESS_NAME'의 카테고리를 생각해서 작성해줘."
                    "다른 설명은 필요없고 데이터의 빈곳을 채운 새 데이터를 format에 맞춰서 반환해줘."
                ),
            },
        ]
    
    completion = ai_client.responses.parse(
        model="gpt-5.1",  # "gpt-4o-mini"
        input=messages,
        # JSON 스키마 지정
        text_format=GPSTitleDesc,
    )

    answer = json.loads(completion.output_text)
    answer["TRAVELER_ID"] = traveler_id
    desc_data.append(answer)
df_desc = pd.DataFrame(desc_data)
df_desc.to_csv(save_path / 'gps_route_desc.csv')
print('save to : ', save_path / 'gps_route_desc.csv')

1675it [47:44,  1.71s/it]

save to :  C:\Users\User\Desktop\xodls\JejuDoldam\AI\route_recommend\model\gps_route_desc.csv





## GPS data에 photo url 추가

In [7]:
from pathlib import Path
from tqdm import tqdm


from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel
from tqdm import tqdm
import math
import requests
import os
import pandas as pd

# Build paths inside the project like this: BASE_DIR / 'subdir'.
from dotenv import load_dotenv

load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

photo_name_URL = "https://places.googleapis.com/v1/places:searchText"

tqdm.pandas()

BASE_DIR = Path().resolve().parent
data_path = BASE_DIR / "route_recommend" / "data_local"
print(data_path)


C:\Users\SSAFY\Desktop\JejuDoldam\AI\route_recommend\data_local


In [8]:
from glob import glob
gps_list = glob(str(data_path) + "/*.csv")
print(len(gps_list))
print(gps_list[0])

1675
C:\Users\SSAFY\Desktop\JejuDoldam\AI\route_recommend\data_local\h000001.csv


In [9]:
def find_photo_name(address_name: str) -> dict:
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": GOOGLE_API_KEY,
        "X-Goog-FieldMask": ("places.id,places.displayName,places.photos,"),
    }

    payload = payload = {
        "textQuery": address_name,
        "languageCode": "ko",
        "regionCode": "KR",
        "maxResultCount": 1,
        "locationBias": {
            "rectangle": {
                "low": {"latitude": 33.114115402, "longitude": 126.14625579156079},
                "high": {"latitude": 33.56448276039651, "longitude": 126.969676444},
            }
        },
    }

    response = requests.post(photo_name_URL, headers=headers, json=payload)
    response.raise_for_status()

    data = response.json()
    # pprint(data["places"][0]["photos"][0]["name"])
    # pprint(data["places"][0]["photos"][0]["heightPx"])
    # pprint(data["places"][0]["photos"][0]["widthPx"])
    photo_name = data["places"][0]["photos"][0]["name"]
    height = data["places"][0]["photos"][0]["heightPx"]
    width = data["places"][0]["photos"][0]["widthPx"]
    return photo_name, width, height

def get_place_photo_url(
    photo_name: str,
    max_width: int = 1600,
    max_height: int = 1600,
) -> str:
    url = f"https://places.googleapis.com/v1/{photo_name}/media"

    params = {
        "maxWidthPx": max_width,
        "maxHeightPx": max_height,
        "skipHttpRedirect": "true",
        "key": GOOGLE_API_KEY,
    }

    response = requests.get(url, params=params)
    response.raise_for_status()

    response = response.json()
    photo_url = response["photoUri"]
    return photo_url

In [18]:

from typing import Optional, Tuple

def find_photo_name2(address_name: str) -> Optional[Tuple[str, int, int]]:
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": GOOGLE_API_KEY,
        "X-Goog-FieldMask": "places.photos",
    }

    payload = {
        "textQuery": address_name,
        "languageCode": "ko",
        "regionCode": "KR",
        "maxResultCount": 1,
        "locationBias": {
            "rectangle": {
                "low": {"latitude": 33.114115402, "longitude": 126.14625579156079},
                "high": {"latitude": 33.56448276039651, "longitude": 126.969676444},
            }
        },
    }

    try:
        response = requests.post(photo_name_URL, headers=headers, json=payload, timeout=5)
        response.raise_for_status()
        data = response.json()
    except requests.RequestException:
        return None, None, None

    places = data.get("places")
    if not places:
        return None, None, None

    photos = places[0].get("photos")
    if not photos:
        return None, None, None

    photo = photos[0]

    photo_name = photo.get("name")
    width = photo.get("widthPx")
    height = photo.get("heightPx")

    if not photo_name:
        return None, None, None

    return photo_name, width, height


In [21]:
def get_place_photo_url2(
    photo_name: Optional[str],
    max_width: int = 4000,
    max_height: int = 3000,
) -> Optional[str]:
    if not photo_name:
        return None

    url = f"https://places.googleapis.com/v1/{photo_name}/media"

    params = {
        "maxWidthPx": max_width,
        "maxHeightPx": max_height,
        "skipHttpRedirect": "true",
        "key": GOOGLE_API_KEY,
    }

    try:
        response = requests.get(url, params=params, timeout=5)
        response.raise_for_status()
        data = response.json()
    except requests.RequestException:
        return None

    return data.get("photoUri")


In [22]:
for csv in tqdm(gps_list):
  df = pd.read_csv(csv)
  names = df["ADDRESS_NAME"].to_list()
  photo_urls = []
  for name in names:
    photo_name, width, height = find_photo_name2(name)
    photo_url = get_place_photo_url2(photo_name, width, height)
    photo_urls.append(photo_url)
  df["PHOTO_URL"] = photo_urls
  df.to_csv(csv)

 50%|█████     | 840/1675 [16:01:50<15:56:06, 68.70s/it]     


KeyboardInterrupt: 

### 과금때문에 종료, 840번까지 완료됨

In [24]:
photo_in = gps_list[:840]
photo_out = gps_list[840:]
photo_in_id = [ path.split('\\')[-1].split('.')[0] for path in photo_in ]
photo_out_id = [ path.split('\\')[-1].split('.')[0] for path in photo_out ]

In [25]:
from pathlib import Path
from tqdm import tqdm

import pandas as pd

tqdm.pandas()

BASE_DIR = Path().resolve().parent
data_path = BASE_DIR / "route_recommend" / "model"
print(data_path)
cluster_df = pd.read_csv(data_path / "rating_user_info_all_cluster.csv")

C:\Users\SSAFY\Desktop\JejuDoldam\AI\route_recommend\model


In [27]:
cluster_df_photo_in = cluster_df[cluster_df["TRAVELER_ID"].isin(photo_in_id)]
print(cluster_df_photo_in["cluster"].value_counts())

cluster
4    286
7    157
6    151
5     69
2     69
1     42
3     40
0     26
Name: count, dtype: int64


다행히 8개 군집 존재. photo가 존재하는 루트만 제공하기 위해 새로 dataframe 작성

In [33]:
cluster_df_photo_in = cluster_df_photo_in.loc[:, ~cluster_df_photo_in.columns.str.contains("^Unnamed")]
cluster_df_photo_in.to_csv(data_path / "rating_user_info_all_cluster_photo.csv")

Index(['Unnamed: 0', 'GENDER', 'AGE_GRP', 'MARR_STTS', 'JOB_NM', 'INCOME',
       'TRAVEL_NUM', 'TRAVEL_STYL_1', 'TRAVEL_STATUS_RESIDENCE',
       'TRAVEL_STATUS_DESTINATION', 'TRAVEL_STATUS_ACCOMPANY',
       'TRAVEL_MOTIVE_1', 'TRAVEL_COMPANIONS_NUM', 'MONTH', 'SEASON',
       'HOW_LONG', 'TRAVELER_ID', 'cluster', 'cluster_name'],
      dtype='object')

# 카카오맵 장소 상세보기 URL 생성

In [None]:
from pathlib import Path
from tqdm import tqdm


from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel
from tqdm import tqdm
import math
import requests
import os
import pandas as pd
from glob import glob
from typing import Any, Dict, Optional

# Build paths inside the project like this: BASE_DIR / 'subdir'.
from dotenv import load_dotenv

load_dotenv()
KAKAO_REST_API_KEY = os.getenv("KAKAO_REST_API_KEY")

BASE_DIR = Path().resolve().parent
data_path = BASE_DIR / "route_recommend" / "data_local"
print(data_path)

gps_list = glob(str(data_path) + "/*.csv")
print(len(gps_list))
print(gps_list[0])


In [None]:
BASE_URL = "https://dapi.kakao.com/v2/local/search/keyword.json"

def search_places_by_keyword(
    keyword: str,
    x: Optional[str] = None,
    y: Optional[str] = None,
    radius: Optional[int] = None,
    page: int = 1,
) -> Dict[str, Any]:
    """
    카카오 로컬 장소 검색 API를 호출합니다.
    
    :param keyword: 검색 키워드 (필수)
    :param x: 중심점 경도 (선택)
    :param y: 중심점 위도 (선택)
    :param radius: 반경 (meters, 선택)
    :param page: 페이지 번호 (기본 1)
    :param size: 한 페이지 결과 수 (기본 15, 최대 45)
    :param sort: 정렬 기준 ("accuracy" | "distance")
    :return: API 응답 JSON 또는 error 정보
    """

    headers = {
        "Authorization": f"KakaoAK {KAKAO_REST_API_KEY}"
    }

    params = {
        "query": keyword,
        "page": page,
    }

    # 위치 기반 옵션들 (선택)
    if x and y:
        params["x"] = x
        params["y"] = y
        params["radius"] = radius

    try:
        response = requests.get(BASE_URL, headers=headers, params=params, timeout=5)
        response.raise_for_status()
    except requests.HTTPError as http_err:
        return {"error": True, "message": f"HTTP error: {http_err}", "status_code": response.status_code}
    except requests.RequestException as req_err:
        return {"error": True, "message": f"Request failed: {req_err}"}

    try:
        data = response.json()
    except ValueError:
        return {"error": True, "message": "Invalid JSON received"}

    # 응답 meta 구조 확인
    if "documents" not in data:
        return {"error": True, "message": "No documents in response", "raw": data}

    # total_count가 0이면 결과 없음
    if data.get("meta", {}).get("total_count", 0) == 0:
        return {"error": False, "places": [], "message": "No results found"}

    # 정상 결과 반환
    return {"error": False, "places": data["documents"]}


In [None]:
from pprint import pprint

for csv in tqdm(gps_list):
  df = pd.read_csv(csv)
  names = df["ADDRESS_NAME"].to_list()
  Xs = df["X_COORD"].to_list()
  Ys = df["Y_COORD"].to_list()
  place_urls = []
  place_cats = []
  for name, x, y in zip(names, Xs, Ys):
    result = search_places_by_keyword(keyword=name, x=x, y=y, radius=70)
    if not result["error"] and result.get("places"):
      place_urls.append(result["places"][0]["place_url"])
      place_cats.append(result["places"][0]["category_group_name"])
    else:
      place_urls.append(None)
      place_cats.append(None)
      
  df["PLACE_URL"] = place_urls
  df["PLACE_CAT"] = place_cats
  df.to_csv(csv)