In [1]:
import requests
import math
import pandas as pd
import numpy as np

In [2]:
kakao_key = open("kakaoRestKey.txt", "r").read()

In [3]:
sp = pd.read_csv('위치정보.csv')

In [4]:
def __get_distance_result__(x1, y1, x2, y2):
    return math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)

In [5]:
def get_distance_between_to_address(addr1, addr2):
    response = requests.get(
        "https://dapi.kakao.com/v2/local/search/address.json",
        params={"query": addr1},
        headers={"Authorization": "KakaoAK " + kakao_key},
    ).json()
    x, y = response["documents"][0]["x"], response["documents"][0]["y"]

    response = requests.get(
        "https://dapi.kakao.com/v2/local/geo/transcoord.json",
        params={"x": x, "y": y, "output_coord": "WTM"},
        headers={"Authorization": "KakaoAK " + kakao_key},
    ).json()
    x1, y1 = response["documents"][0]["x"], response["documents"][0]["y"]

    response = requests.get(
        "https://dapi.kakao.com/v2/local/search/address.json",
        params={"query": addr2},
        headers={"Authorization": "KakaoAK " + kakao_key},
    ).json()
    x, y = response["documents"][0]["x"], response["documents"][0]["y"]

    response = requests.get(
        "https://dapi.kakao.com/v2/local/geo/transcoord.json",
        params={"x": x, "y": y, "output_coord": "WTM"},
        headers={"Authorization": "KakaoAK " + kakao_key},
    ).json()
    x2, y2 = response["documents"][0]["x"], response["documents"][0]["y"]

    return __get_distance_result__(x1, y1, x2, y2)

In [6]:
district_dfs = {district: df for district, df in sp.groupby('지역')}

In [7]:
def calculate_school_distances(district_name, df_dict, df_name=None):
    if district_name not in df_dict:
        print(f"{district_name}에 대한 데이터가 없습니다.")
        return None
    
    df = df_dict[district_name]

    if len(df) <= 1:
        print("학교가 하나입니다.")
        return None

    schools = df['학교명'].tolist()
    addresses = df['도로명주소'].tolist()

    distances_df = pd.DataFrame(index=schools, columns=schools)

    for i, (school1, addr1) in enumerate(zip(schools, addresses)):
        for j, (school2, addr2) in enumerate(zip(schools, addresses)):
            if i <= j: 
                dist = get_distance_between_to_address(addr1, addr2)
                distances_df.loc[school1, school2] = dist
                distances_df.loc[school2, school1] = dist 
    
    np.fill_diagonal(distances_df.values, 0)

    if df_name is not None:
        df_name[district_name] = distances_df
    
    return distances_df

In [8]:
def compute_inter_district_school_distances(district1, district2, df_dict, df_name=None):
    # 두 지역의 데이터프레임 가져오기
    if district1 not in df_dict or district2 not in df_dict:
        if district1 not in df_dict:
            print(f"{district1}에 대한 데이터가 없습니다.")
        if district2 not in df_dict:
            print(f"{district2}에 대한 데이터가 없습니다.")
        return None
    
    df1 = df_dict[district1]
    df2 = df_dict[district2]
  
    if len(df1) == 0 or len(df2) == 0:
        if len(df1) == 0:
            print(f"{district1}에 학교가 없습니다.")
        if len(df2) == 0:
            print(f"{district2}에 학교가 없습니다.")
        return None
        
    schools1 = df1['학교명'].tolist()
    addresses1 = df1['도로명주소'].tolist()
    schools2 = df2['학교명'].tolist()
    addresses2 = df2['도로명주소'].tolist()

    distances_df = pd.DataFrame(index=schools1, columns=schools2)

    for school1, addr1 in zip(schools1, addresses1):
        for school2, addr2 in zip(schools2, addresses2):
            dist = get_distance_between_to_address(addr1, addr2)
            distances_df.loc[school1, school2] = dist

    if df_name is not None:
        key = f"{district1}_to_{district2}"
        df_name[key] = distances_df
    
    return distances_df

In [9]:
def calculate_all_school_distances(sp, save_to_csv=False, csv_path=None):
    schools = sp['학교명'].tolist()
    addresses = sp['도로명주소'].tolist()
    
    if len(schools) < 2:
        print("학교가 두 개 미만입니다. 거리 계산이 불가능합니다.")
        return None
    
    distances_df = pd.DataFrame(index=schools, columns=schools, dtype=float)
    
    for i, (school1, addr1) in enumerate(zip(schools, addresses)):
        for j, (school2, addr2) in enumerate(zip(schools, addresses)):
            if i <= j:
                try:
                    dist = float(get_distance_between_to_address(addr1, addr2))
                except (ValueError, TypeError):
                    print(f"Warning: {school1}와 {school2} 사이의 거리를 숫자로 변환할 수 없습니다.")
                    dist = np.nan
                distances_df.loc[school1, school2] = dist
                distances_df.loc[school2, school1] = dist

    np.fill_diagonal(distances_df.values, 0)
    
    max_distance = distances_df.max().max()
    distances_df.fillna(max_distance + 1, inplace=True)
    
    total_schools = len(schools)
    total_pairs = (total_schools * (total_schools - 1)) // 2

    upper_tri = np.triu(distances_df.values, k=1)
    min_dist_idx = divmod(upper_tri.argmin(), upper_tri.shape[1])
    max_dist_idx = divmod(upper_tri.argmax(), upper_tri.shape[1])
    
    closest_pair = (schools[min_dist_idx[0]], schools[min_dist_idx[1]])
    farthest_pair = (schools[max_dist_idx[0]], schools[max_dist_idx[1]])

    valid_distances = distances_df.values[np.triu_indices_from(distances_df.values, k=1)]
    valid_distances = valid_distances[~np.isnan(valid_distances)]
    average_distance = np.mean(valid_distances) if len(valid_distances) > 0 else np.nan

    print(f"총 학교 수: {total_schools}")
    print(f"총 학교 쌍 수: {total_pairs}")
    print(f"평균 거리: {average_distance:.2f}m" if not np.isnan(average_distance) else "평균 거리: 계산 불가")

    distances_df.replace(max_distance + 1, np.nan, inplace=True)
    
    return distances_df

In [12]:
#all_distances = calculate_all_school_distances(sp)

총 학교 수: 40
총 학교 쌍 수: 780
평균 거리: 13201.64m


In [14]:
#all_distances.to_csv('학교간거리.csv', index=False)