In [12]:
import requests
import urllib


def fetch_kakao_data(keyword, radius, api_key):
    headers = {
        "Content-Type": "application/json; charset=utf-8",
        "Authorization": f"KakaoAK {api_key}",
    }

    results = []
    page = 1
    is_end = False

    params = urllib.parse.urlencode({"query": keyword, "radius": radius, "page": page})
    res = requests.get(
        "https://dapi.kakao.com/v2/local/search/keyword.json",
        headers=headers,
        params=params,
    )
    result = res.json()

    document = result["documents"][0]
    ret = {
        "x": float(document["x"]),
        "y": float(document["y"]),
        "address_name": document["address_name"],
        "place_name": document["place_name"],
    }
    return ret


keyword = "금사"
radius = 100

a_station = fetch_kakao_data(keyword, radius, "1fe58e9d9c62369f81cdb65f851c7b18")
a_station

{'x': 129.1151206096,
 'y': 35.2157859199559,
 'address_name': '부산 금정구 금사동 154-1',
 'place_name': '금사역 부산4호선'}

# 지하철 역 좌표 정보 정리


In [None]:
from geotool import get_lati_longi
import pandas as pd
import requests
import urllib
from geopy.distance import geodesic

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_colwidth", None)


def fetch_kakao_data(keyword, radius, api_key):
    headers = {
        "Content-Type": "application/json; charset=utf-8",
        "Authorization": f"KakaoAK {api_key}",
    }

    params = urllib.parse.urlencode({"query": keyword, "radius": radius, "page": 1})
    res = requests.get(
        "https://dapi.kakao.com/v2/local/search/keyword.json",
        headers=headers,
        params=params,
    )
    result = res.json()

    if result["documents"]:
        document = result["documents"][0]
        ret = {
            "x": float(document["x"]),
            "y": float(document["y"]),
            "address_name": document["address_name"],
            "place_name": document["place_name"],
        }
        return ret
    return None


# 엑셀 파일을 데이터프레임으로 읽기
file_path = "./전체_도시철도역사정보_20240331.xlsx"
df = pd.read_excel(file_path)

# 위도와 경도 정보를 추가할 컬럼 생성
df["lati"] = None
df["longi"] = None

# Kakao API 키
api_key = ""

# 각 주소에 대해 위도와 경도 정보 추가
for index, row in df.iterrows():
    if pd.notna(row["역위도"]) and pd.notna(row["역경도"]):
        df.at[index, "lati"] = (
            row["역위도"] if row["역위도"] < row["역경도"] else row["역경도"]
        )
        df.at[index, "longi"] = (
            row["역경도"] if row["역위도"] < row["역경도"] else row["역위도"]
        )
    else:
        coordinates = get_lati_longi(row["역사도로명주소"])
        if coordinates:
            df.at[index, "lati"] = coordinates["lati"]
            df.at[index, "longi"] = coordinates["longi"]
        else:
            # get_lati_longi로 좌표를 가져올 수 없는 경우 Kakao API 사용
            keyword = row["역사명"]
            if not keyword.endswith("역"):
                keyword += "역"

            if row["환승역구분"] == "환승역":
                keyword = f"{row['역사명']}역 {row['노선명']}"

            kakao_data = fetch_kakao_data(keyword, 200, api_key)
            if kakao_data:
                df.at[index, "lati"] = kakao_data["y"]
                df.at[index, "longi"] = kakao_data["x"]

# 결과를 확인
print(df)

# 결과를 파일로 저장
df.to_excel("전국_도시철도역사정보_좌표포함.xlsx", index=False)

In [1]:
import os
num_cores = os.cpu_count() - 3
print(num_cores)

21


In [None]:
df.head

In [None]:
from geopy.distance import geodesic
import pandas as pd
import numpy as np

# 일련번호 컬럼 추가
df["일련번호"] = range(1, len(df) + 1)

# 근접 컬럼과 그룹 번호 컬럼 추가
df["근접"] = ""
df["그룹번호"] = ""

# 거리 계산 및 그룹 할당
group_number = 1
for i in range(len(df)):
    if df.at[i, "그룹번호"] == "":
        df.at[i, "그룹번호"] = group_number
        # df.at[i, "근접"] = "근접"
        try:
            coord1 = (float(df.at[i, "lati"]), float(df.at[i, "longi"]))
            if np.isnan(coord1[0]) or np.isnan(coord1[1]):
                raise ValueError(f"Invalid coordinates for row {i}: {coord1}")

            for j in range(i + 1, len(df)):
                try:
                    coord2 = (float(df.at[j, "lati"]), float(df.at[j, "longi"]))
                    if np.isnan(coord2[0]) or np.isnan(coord2[1]):
                        raise ValueError(f"Invalid coordinates for row {j}: {coord2}")

                    distance = geodesic(coord1, coord2).meters
                    if distance <= 250:
                        df.at[j, "그룹번호"] = group_number
                        df.at[j, "근접"] = "근접"
                        df.at[i, "그룹번호"] = group_number
                        df.at[i, "근접"] = "근접"
                except ValueError as e:
                    print(f"=== Error processing row {j}: {e} ====")
                    print(f"Row data: {df.iloc[j]}")
                    break
                except Exception as e:
                    print(f"Unexpected error processing row {j}: {e}")
                    print(f"Row data: {df.iloc[j]}")

            group_number += 1
        except ValueError as e:
            print(f">> Error processing row {i}: {e}")
            print(f"Row data: {df.iloc[i]}")
            break
        except Exception as e:
            print(f"Unexpected error processing row {i}: {e}")
            print(f"Row data: {df.iloc[i]}")
            break

# 근접역이 아닌 경우 처리
df.loc[df["근접"] == "", "그룹번호"] = ""

# '근접노선수' 컬럼 추가
df["근접노선수"] = 1

# 각 그룹 번호의 개수 세어 '근접노선수' 컬럼에 대입
group_counts = df["그룹번호"].value_counts()
for group in group_counts.index:
    if group != "":
        df.loc[df["그룹번호"] == group, "근접노선수"] = group_counts[group]

# 결과를 확인
print(df)

# 결과를 파일로 저장
df.to_excel("updated_도시철도역사정보.xlsx", index=False)

In [19]:
# '근접노선수' 컬럼 추가
df["근접노선수"] = 1

# 각 그룹 번호의 개수 세어 '근접노선수' 컬럼에 대입
group_counts = df["그룹번호"].value_counts()
for group in group_counts.index:
    if group != "":
        df.loc[df["그룹번호"] == group, "근접노선수"] = group_counts[group]

# 결과를 파일로 저장
df.to_excel("updated_도시철도역사정보.xlsx", index=False)

In [14]:
df.to_excel("updated_도시철도역사정보.xlsx", index=False)

In [None]:
import requests
import urllib
import json

headers = {
    "Content-Type": "application/json; charset=utf-8",
    "Authorization": "KakaoAK {}".format("1fe58e9d9c62369f81cdb65f851c7b18"),
}
kw = "지하철"
rdus = 20000
p = urllib.parse.urlencode({"query": kw, "radius": rdus})
res = requests.get(
    "https://dapi.kakao.com/v2/local/search/keyword.json", headers=headers, params=p
)
result = res.json()
x = float(result["documents"][0]["x"])
y = float(result["documents"][0]["y"])
addr = result["documents"][0]["address_name"]
pname = result["documents"][0]["place_name"]
result["documents"]
# len(result['documents'])
# x, y, addr, pname

In [6]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_colwidth", None)

display(len(df))
df = df.drop_duplicates(subset=["lati", "longi"])
display(len(df))

1069

960

In [3]:
import requests
import urllib
import json

headers = {
    "Content-Type": "application/json; charset=utf-8",
    "Authorization": "KakaoAK {}".format("1fe58e9d9c62369f81cdb65f851c7b18"),
}
kw = "지하철"
rdus = 20000
p = urllib.parse.urlencode({"query": kw, "radius": rdus})
res = requests.get(
    "https://dapi.kakao.com/v2/local/search/keyword.json", headers=headers, params=p
)
result = res.json()
x = float(result["documents"][0]["x"])
y = float(result["documents"][0]["y"])
addr = result["documents"][0]["address_name"]
pname = result["documents"][0]["place_name"]
result["meta"]
# len(result['documents'])
# x, y, addr, pname

{'is_end': False,
 'pageable_count': 45,
 'same_name': {'keyword': '지하철', 'region': [], 'selected_region': ''},
 'total_count': 5559}

In [None]:
#

import requests
import urllib
import json

headers = {
    "Content-Type": "application/json; charset=utf-8",
    "Authorization": "KakaoAK {}".format("1fe58e9d9c62369f81cdb65f851c7b18"),
}
category_group_code = "SW8"
rdus = 20000
p = urllib.parse.urlencode({"category_group_code": category_group_code, "radius": rdus})
res = requests.get(
    "https://dapi.kakao.com/v2/local/search/category.json", headers=headers, params=p
)
result = res.json()
x = float(result["documents"][0]["x"])
y = float(result["documents"][0]["y"])
addr = result["documents"][0]["address_name"]
pname = result["documents"][0]["place_name"]
# result["meta"]
# len(result['documents'])
x, y, addr, pname

In [None]:
import requests
import urllib
import pandas as pd

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_colwidth", None)


def fetch_kakao_data(keyword, radius, api_key):
    headers = {
        "Content-Type": "application/json; charset=utf-8",
        "Authorization": f"KakaoAK {api_key}",
    }

    results = []
    page = 1
    is_end = False

    while not is_end:
        params = urllib.parse.urlencode(
            {"query": keyword, "radius": radius, "page": page}
        )
        res = requests.get(
            "https://dapi.kakao.com/v2/local/search/keyword.json",
            headers=headers,
            params=params,
        )
        result = res.json()

        for document in result["documents"]:
            results.append(
                {
                    "x": float(document["x"]),
                    "y": float(document["y"]),
                    "address_name": document["address_name"],
                    "place_name": document["place_name"],
                }
            )
        print(len(results))
        is_end = result["meta"]["is_end"]
        page += 1

    return pd.DataFrame(results)


# API 키와 필요한 변수를 정의합니다.
api_key = ""
keyword = "지하철"
radius = 200000

# 데이터를 가져와서 DataFrame으로 변환합니다.
dataframe = fetch_kakao_data(keyword, radius, api_key)

# 결과를 출력합니다.
print(dataframe)

In [None]:
import requests
import urllib
import pandas as pd


def fetch_station_count(city_name, latitude, longitude, radius, api_key):
    headers = {
        "Content-Type": "application/json; charset=utf-8",
        "Authorization": f"KakaoAK {api_key}",
    }

    category_group_code = "SW8"
    page = 1
    results = set()

    while True:
        params = urllib.parse.urlencode(
            {
                "category_group_code": category_group_code,
                "radius": radius,
                "page": page,
                "x": longitude,
                "y": latitude,
            }
        )
        res = requests.get(
            "https://dapi.kakao.com/v2/local/search/category.json",
            headers=headers,
            params=params,
        )
        result = res.json()

        if "documents" not in result:
            break

        if "documents" in result:
            for document in result["documents"]:
                results.add(
                    (
                        float(document["x"]),
                        float(document["y"]),
                        document["address_name"],
                        document["place_name"],
                    )
                )
        else:
            print(f"Error: 'documents' key not found in the result for page {page}")
            break

        page += 1

    return len(results), pd.DataFrame(
        results, columns=["x", "y", "address_name", "place_name"]
    )


# API 키와 필요한 변수를 정의합니다.
radius = 20000

# 도시별 중심 위경도 값을 정의합니다.
cities = {
    "수도권": (37.5665, 126.9780),
    "부산": (35.1796, 129.0756),
    "대구": (35.8714, 128.6014),
    "대전": (36.3504, 127.3845),
    "광주": (35.1595, 126.8526),
}

# 각 도시의 지하철역 개수를 가져옵니다.
station_counts = {}
dataframes = {}
for city, (lat, lon) in cities.items():
    count, df = fetch_station_count(city, lat, lon, radius, api_key)
    station_counts[city] = count
    dataframes[city] = df

# 결과를 표로 정리합니다.
station_counts_df = pd.DataFrame(
    list(station_counts.items()), columns=["City", "Subway Station Count"]
)

# 결과 출력
print(station_counts_df)

# 각 도시의 데이터프레임을 출력합니다.
for city, df in dataframes.items():
    print(f"\n{city} 지하철역 데이터프레임:\n", df)