In [7]:
from qrytool import load_data_into_dataframe, insert_dataframe_into_table
import pandas as pd
import re

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_colwidth", None)

In [4]:
addresses_df = load_data_into_dataframe(
    """
SELECT all_id, 회원주소, 회원lati, 회원longi
FROM customatrix
WHERE 가입일시::date < '2024-06-03' AND 회원주소 IS NOT NULL AND 회원lati IS NOT NULL AND 회원longi IS NOT NULL
"""
)


def final_housing_type_classification(address):
    patterns = [
        r"아파트|오피스텔|단지|타운|자이|래미안|푸르지오|하이츠|캐슬|빌라|e편한|팰리스|해모로|휴먼시|힐스테이트|아이파크",
        r"\b\w+빌\b",
        r"\s*\b\d+동\s*[-]{0,1}\s*[\w]+호",
        r"\([^)]*,[^)]*\)[\w]+-[\w]+",
        r"\([^)]*,[^)]*\)\s*[\w]+",
        r"\([^)]*,[^)]*\)\s*[\w]+호",
        r"\([^)]*동[^)]*\)\s*[\w]+호",
        r"\)\s*[^\(]*[\w]+[호]*$",
        r"[\w]+동\s*[\w]+호",
        r"[^\(]+\d+\s*호",
        r"\b\d+-[\w]+호\b",
        r"[^길]\b\d+\s*-\s*\d+\b\s*",
    ]
    return (
        "공동" if any(re.search(pattern, address) for pattern in patterns) else "일반"
    )


# Load the addresses from a TSV file
# addresses_df = pd.read_csv('addresses.tsv', sep='\t')
# Apply the classification function to each address
addresses_df["주택유형"] = addresses_df["회원주소"].apply(
    final_housing_type_classification
)
housing_type_counts = addresses_df["주택유형"].value_counts()

housing_type_counts

주택유형
공동    17476
일반      471
Name: count, dtype: int64

In [12]:
addresses_df = addresses_df[addresses_df["주택유형"] == "공동"]
# 동과 호를 추출하는 패턴들
patterns = [
    '\\([0-9가-힣]+[가동리]{1},\\s*([\\s\\w가-힣]+)\\)\\s*(\\w*[동차]{0,1})[-]{0,1}\\s{0,1}(\\w*[호]{0,1})',
    '\\([0-9가-힣]+[가동리]{1}\\)\\s*([\\s\\w]+[가-힣]+)\\s*(\\w+[동차]{1})[-]{0,1}\\s{0,1}(\\w*[호]{0,1})',
    '\\)\\s+([가-힣\\w]+(?<!동))\\s*([\\d]{3,4}호)',
    '\\(([\\w가-힣]+[^가동리])\\)\\s*(\\w*[동차]{0,1})[-]{0,1}\\s{0,1}(\\w*[호]{0,1})',
    '\\)\\s+([가-힣\\w\\s]+오피스텔\\s*(?<!동))\\s*([\\d]{3,4}호)',
    '\\)\\s+([가-힣\\w\\s]+아파트\\s*(?<!동))\\s*([\\d]{3,4}호)'
]
patterns = [
    # r'\)(\b[\w]+[동]{0,1})\s*[-]{0,1}\s*([\w]+[호]{0,1})',
    r"\)\s*([\w]+[동]*)\s*[-]*\s*([\w]+[호]*)",
    r"\(([^)]*),[^)]*\)\s*([\w]+-[^\s]+)",
    r"([\w]+동)\s*([\w]+호)",
    r"\)\s*([\w]+호)",
    r"\([^)]*,[^)]*\)\s*([\w]+호)",
    r"\([^)]*\)\s*\([^)]*\)\s*([\w]+호)",
    r"\)\s*[^\(]*([\w]+[호]*)$",  # TO DELETE
    r"\s[^\d]+[^\(][^\)][^동]+\s([\d]+호)",  # 201호 처럼 동 없이 하나만 나올 경우
    r"\b([\w])+-([\w]+호)\b",
    r"[^길가로동]\s([\w]+)\s*-\s*([\w]+)\b\s*",  # 동,호 값이 없을 경우 동호수와 지번의 혼선 방지
]

# 동호 추출 및 새 컬럼 추가
for index, row in addresses_df.iterrows():
    dong = ""
    ho = ""
    for pattern in patterns:
        match = re.search(pattern, row["회원주소"])
        if match:
            if len(match.groups()) == 1:
                ho = match.group(1)  # 호만 추출될 경우
            elif len(match.groups()) >= 2:
                dong = match.group(1)  # 동 추출
                ho = match.group(2)  # 호 추출
    addresses_df.at[index, "동"] = dong
    addresses_df.at[index, "호"] = ho

# 결과 데이터프레임 저장
addresses_df.to_csv("marked_dnho_addresses.tsv", sep="\t", index=False)

In [12]:
addresses_df = addresses_df[addresses_df["주택유형"] == "공동"]
# 동과 호를 추출하는 패턴들
patterns = [
    r'\([0-9가-힣]+[가동리]{1},\s*([\s\w가-힣]+)\)\s*(\w*[동차]{0,1})[-]{0,1}\s{0,1}(\w*[호]{0,1})',
    r'\([0-9가-힣]+[가동리]{1}\)\s*([\s\w]+[가-힣]+)\s*(\w+[동차]{1})[-]{0,1}\s{0,1}(\w*[호]{0,1})',
    r'\)\s+([가-힣\w]+(?<!동))\s*([\d]{3,4}호)',
    r'\(([\w가-힣]+[^가동리])\)\s*(\w*[동차]{0,1})[-]{0,1}\s{0,1}(\w*[호]{0,1})',
    r'\)\s+([가-힣\w\s]+오피스텔\s*(?<!동))\s*([\d]{3,4}호)',
    r'\)\s+([가-힣\w\s]+아파트\s*(?<!동))\s*([\d]{3,4}호)'
]

# 동호 추출 및 새 컬럼 추가
for index, row in addresses_df.iterrows():
    complex = ""
    dong = ""
    ho = ""
    for pattern in patterns:
        match = re.search(pattern, row["회원주소"])
        if match:
            if len(match.groups()) == 2:
                complex = match.group(1)  # 호만 추출될 경우
                ho = match.group(2)  # 호만 추출될 경우
            elif len(match.groups()) >= 3:
                complex = match.group(1)  # 호만 추출될 경우
                dong = match.group(2)  # 동 추출
                ho = match.group(3)  # 호 추출
            break
    addresses_df.at[index, "건물(단지)명"] = complex
    addresses_df.at[index, "동"] = dong
    addresses_df.at[index, "호"] = ho

# 결과 데이터프레임 저장
addresses_df.to_csv("marked_dnho_addresses.tsv", sep="\t", index=False)

In [3]:


def split_address(address):
    match = None
    group2 = None
    group3 = None
    group4 = None
    group5 = None
    group6 = None
    addr1 = None
    addr2 = None
    if pd.notnull(address):
        pattern = re.compile(
            r"\s[\-0-9]+\s(\([가-힣a-zA-Z0-9,\s]+\)\s)?(.*)|.*구.*동\s(.*\s아파트)(.*)|[\s로길동]+[\-0-9번지]+(\([가-힣a-zA-Z0-9,\s]+\))?([,\s].*)"
        )
        match = pattern.search(address)  # search를 사용하여 전체 문자열에서 패턴 매치

        if match:
            group2 = match.group(2)
            group3 = match.group(3)
            group4 = match.group(4)
            group5 = match.group(5)
            group6 = match.group(6)
            # print(
            #     f"{ match.group(1)} | {group2}|{group3}|{group4}|{group5}|{group6}|"
            # )
            if group2:
                addr1 = address.replace(group2, "")
                addr2 = group2
            elif group3:
                if "아파트" in group3:
                    addr1 = address.replace(group4, "")
                    addr2 = group4
                else:
                    addr1 = address.replace(group3, "")
                    addr2 = group3
            elif group6.strip():
                addr1 = address.replace(group6, "")
                addr2 = group6.replace(",", "").strip()
            else:
                addr1 = address
                addr2 = None
            ret1 = addr1
            ret2 = addr2
        else:
            print("No match :" + address)
            ret1 = address
            ret2 = None
    return ret1, ret2


# for index, row in addresses_df.iterrows():
#     addr1, addr2 = split_address(row["회원주소"])
#     addresses_df.at[index, '회원주소1'] = addr1
#     addresses_df.at[index, '회원주소2'] = addr2

# addresses_df.to_csv("marked_split_addresses.tsv", sep="\t", index=False)

In [5]:
from geotool import get_naver_coordinates, get_kr_addr_type
get_naver_coordinates('경기 수원시 권선구 곡선로 20')
# get_kr_addr_type('경기 수원시 권선구 곡선로 20')

{'old_addr': '경기 수원시 권선구 권선동 1356',
 'new_addr': '경기 수원시 권선구 권선동 곡선로 20',
 'building_name': '수원아이파크시티6단지',
 'sub_building_no': '16666',
 'lati': 37.23868,
 'longi': 127.02657,
 'hcode': '4111368000',
 'r1': '경기',
 'r2': '수원시 권선구',
 'r3': '권선동',
 'bcode': '4111313700'}

In [None]:
from geotool import get_old_addr_from_addressalls_clbe, get_kr_addr_type, split_address
from qrytool import load_data_into_dataframe, insert_dataframe_into_table
import pandas as pd
import re

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.max_colwidth", None)


def get_super_str(a, b):
    a = a.strip()
    b = b.replace(' ', '').strip()
    if len(a) == len(b) and a == b:
        return a
    elif len(a) < len(b) and a in b:
        return b
    elif len(a) > len(b) and b in a:
        return a
    else:
        print("super str 판별 어려움: ", a, ' vs. ', b)
    return a


def is_same_str_by_rem_space(a, b):
    a = a.strip()
    b = b.replace(' ', '').strip()
    if len(a) == len(b) and a == b:
        return a


def has_end_only_braket_string(s):
    # "("가 없고, ")"가 하나만 있으며, ")"로 끝나는지 확인
    return s.count('(') == 0 and s.count(')') == 1 and s.endswith(')')


def has_only_one_set_braket_string(s):
    return s.count('(') == 1 and s.count(')') == 1 and s.startswith('(') and s.endswith(')')


def has_middle_bracket(s):
    return ")" in s and not s.endswith(")") and not s.startswith(")")


def is_single_word(s):
    # 남은 문자열 내에 XXX도 또는 XXX호가 보이지 않는 영문,숫자,한글이 조합된 문자열이라면 True
    s = s.replace(' ', '')
    pattern = r'([\dA-Za-z가나다라마바]+동)*\s*(\d+[호])*'
    match = re.search(pattern, s)
    # match가 None이면 정규식과 일치하는 부분이 없는 것이므로 False 반환
    if match and match.group():
        return False
    else:
        return bool(re.match(r'^[가-힣A-Za-z0-9]+$', s))
    # return bool(re.match(r'^[가-힣A-Za-z]+$', s))


def has_not_relevant_pattern(s):
    pattern = r"^[\s\d\-]+$|^[\s\d]+층\s*$"
    match = re.search(pattern, s)
    if match:
        return True


def get_complex_name(remained_str):
    remained_str = remained_str.strip()
    if remained_str == "":
        return "", ""
    if remained_str[0] == "(":
        remained_str = remained_str[1:].strip()

    if has_only_one_set_braket_string(remained_str):
        return remained_str.replace("(", "").replace(")", "").strip(), ""
    elif has_end_only_braket_string(remained_str):
        return remained_str.replace(")", "").strip(), ""
    elif has_not_relevant_pattern(remained_str):
        print("예외문자열:" + remained_str)
        return "", remained_str
    elif has_middle_bracket(remained_str):
        # print("괄호포함문자열:" + remained_str)
        names = remained_str.split(sep=")")
        if len(names) > 2:
            print("이상한문자열:" + remained_str)
        remained_str = "" if is_same_str_by_rem_space(names[0], names[1]) else names[1].strip()
        return names[0].strip(), remained_str
        # return get_super_str(names[0].strip(), names[1].strip())
    elif is_single_word(remained_str):
        # print("문자열만 남은 경우:", remained_str)
        return remained_str, ""

    else:
        print("추가고려 필요 ==> " + remained_str)
        return "", remained_str


def get_dong_complex_name(remained_str):
    dong_name = ''
    complex_name = ''
    sep = ''
    pattern = r'\(([가-힣\d]+[동가리]{1})([\s,)]){1}'
    if not remained_str or remained_str.strip() == "":
        return "", "", ""
    match = re.search(pattern, remained_str)
    if match:
        dong_name = match.group(1)  # 동명 추출
        sep = match.group(2)  # 동명 추출
        remained_str = remained_str.replace('(' + dong_name + sep, '').strip()
        complex_name, remained_str = get_complex_name(remained_str)
    else:
        complex_name, remained_str = get_complex_name(remained_str)
    return dong_name, complex_name, remained_str


addresses_df = load_data_into_dataframe(
    """
SELECT all_id, 회원주소, 회원lati, 회원longi
FROM customatrix
WHERE 가입일시::date < '2024-06-03' AND 회원주소 IS NOT NULL AND 회원lati IS NOT NULL AND 회원longi IS NOT NULL
"""
)
for index, row in addresses_df.iterrows():
    addr1, addr2 = split_address(row["회원주소"])
    addresses_df.at[index, '회원주소1'] = addr1
    addresses_df.at[index, '회원주소2'] = addr2

    addresses_df.at[index, "회원주소1_old"] = addr1
    if get_kr_addr_type(addr1) == '도로':
        old_addr1 = get_old_addr_from_addressalls_clbe(addr1)
        addresses_df.at[index, "회원주소1_old"] = old_addr1['old_addr'] if old_addr1 is not None else ''

# print(addresses_df.head())
patterns = [
    r'([\dA-Za-z가나다라마바]+동)\s*(\w+[호]{0,1})\s*$',
    r'\)\s*(\w+)\s*[-ㅡ]\s*([\dA-Za-z]+호*)\s*$',
    r'\)\s*[가-힣A-Za-z\.\,]+\s*([\dABCDEabcde]*)\s*[-ㅡ\s]{1}\s*([\dA-Za-z]+호*)\s*$',
    r'\s*(\w+)\s*[-ㅡ]\s*([\dA-Za-z]+호*)\s*$',
    r'(아파트동)\s*(\w+[호]{0,1})\s*$',
    r'(오피스텔동)\s*(\w+[호]{0,1})\s*$',
    r'\)\s*([\dA-Za-z]+호)\s*$',
    r'\)\s*[가-힣A-Za-z\.\,]+([\dA-Za-z]+호)\s*$',
    r'\s*([\dA-Za-z]+호)\s*$',
]
cnt = 0
for index, row in addresses_df.iterrows():
    cnt += 1
    dong = ""
    ho = ""
    temp_addr2 = ""
    dong_name = ""
    complex_name = ""
    for idx, pattern in enumerate(patterns):
        if not row["회원주소2"] or row["회원주소2"].strip() == "":
            continue
        # print(row['회원주소2'])
        match = re.search(pattern, row["회원주소2"])
        if match:
            if len(match.groups()) == 1:
                ho = match.group(1)  # 호만 추출될 경우
                temp_addr2 = row["회원주소2"].replace(ho, '')
                if '호' not in ho:
                    ho = ho + '호'
            elif len(match.groups()) >= 2:
                dong = match.group(1)  # 동 추출
                ho = match.group(2)  # 호 추출
                row["회원주소2"] = row["회원주소2"].replace(ho, '')
                temp_addr2 = row["회원주소2"].replace(dong, '')
                if '호' not in ho:
                    ho = ho + '호'
                if len(dong) >= 1 and '동' not in dong:
                    dong = dong + '동'
                if idx in [1, 2, 3]:
                    temp_addr2 = temp_addr2.replace('-', '')
            break
    temp_addr2 = temp_addr2.strip()
    dong_name, complex_name, remained_str = get_dong_complex_name(temp_addr2)
    addresses_df.at[index, "동"] = dong
    addresses_df.at[index, "호"] = ho
    addresses_df.at[index, "회원주소_reduced"] = temp_addr2
    addresses_df.at[index, "동네"] = dong_name
    addresses_df.at[index, "건물명"] = complex_name
    addresses_df.at[index, "잔여문자열"] = remained_str

# addresses_df.head(10).to_csv("marked_동호_건물명후보.tsv", sep="\t", index=False)
insert_dataframe_into_table(addresses_df, "customatrix_complex_dong_ho")
addresses_df.to_csv("marked_동호_건물명후보.tsv", sep="\t", index=False)

In [17]:
from qrytool import load_data_into_dataframe, insert_dataframe_into_table
import warnings
import random
import pandas as pd
import json
import numpy as np
import sys
import re
print(sys.path)


def split_address(address):
    match = None
    group2 = None
    group3 = None
    group4 = None
    group5 = None
    group6 = None
    addr1 = None
    addr2 = None
    if pd.notnull(address):
        pattern = re.compile(
            r"\s[\-0-9]+\s(\([가-힣a-zA-Z0-9,\s]+\)\s)?(.*)|.*구.*동\s(.*\s아파트)(.*)|[\s로길동]+[\-0-9번지]+(\([가-힣a-zA-Z0-9,\s]+\))?([,\s].*)"
        )
        match = pattern.search(address)  # search를 사용하여 전체 문자열에서 패턴 매치

        if match:
            group2 = match.group(2)
            group3 = match.group(3)
            group4 = match.group(4)
            group5 = match.group(5)
            group6 = match.group(6)
            # print(
            #     f"{ match.group(1)} | {group2}|{group3}|{group4}|{group5}|{group6}|"
            # )
            if group2:
                addr1 = address.replace(group2, "")
                addr2 = group2
            elif group3:
                if "아파트" in group3:
                    addr1 = address.replace(group4, "")
                    addr2 = group4
                else:
                    addr1 = address.replace(group3, "")
                    addr2 = group3
            elif group6.strip():
                addr1 = address.replace(group6, "")
                addr2 = group6.replace(",", "").strip()
            else:
                addr1 = address
                addr2 = None
            ret1 = addr1
            ret2 = addr2
        else:
            print("No match :" + address)
            ret1 = address
            ret2 = None
    return ret1, ret2


def get_address_by_name_mdn(name, mdn):
    qry_name_mdn = f"SELECT all_id,이름,결제전화,회원주소,회원주소1,회원lati,회원longi,가입일시 FROM customatrix WHERE 이름='{name}' and 결제전화 like '%{mdn}'"
    df = load_data_into_dataframe(qry_name_mdn)
    return df


def get_address_by_cid(cid):
    qry_cid = f"SELECT all_id,이름,결제전화,회원주소,회원주소1,회원lati,회원longi,가입일시 FROM customatrix WHERE all_id={cid}"
    df = load_data_into_dataframe(qry_cid)
    return df


def get_address_by_addr(addr):
    qry_cid = f"SELECT all_id,이름,결제전화,회원주소,회원주소1,회원lati,회원longi,가입일시 FROM customatrix WHERE 회원주소={addr}"
    df = load_data_into_dataframe(qry_cid)
    return df


def is_same_str_by_rem_space(a, b):
    a = a.strip()
    b = b.replace(' ', '').strip()
    if len(a) == len(b) and a == b:
        return a


def has_end_only_braket_string(s):
    # "("가 없고, ")"가 하나만 있으며, ")"로 끝나는지 확인
    return s.count('(') == 0 and s.count(')') == 1 and s.endswith(')')


def has_only_one_set_braket_string(s):
    return s.count('(') == 1 and s.count(')') == 1 and s.startswith('(') and s.endswith(')')


def has_middle_bracket(s):
    return ")" in s and not s.endswith(")") and not s.startswith(")")


def is_single_word(s):
    # 남은 문자열 내에 XXX도 또는 XXX호가 보이지 않는 영문,숫자,한글이 조합된 문자열이라면 True
    s = s.replace(' ', '')
    pattern = r'([\dA-Za-z가나다라마바]+동)*\s*(\d+[호])*'
    match = re.search(pattern, s)
    # match가 None이면 정규식과 일치하는 부분이 없는 것이므로 False 반환
    if match and match.group():
        return False
    else:
        return bool(re.match(r'^[가-힣A-Za-z0-9]+$', s))
    # return bool(re.match(r'^[가-힣A-Za-z]+$', s))


def has_not_relevant_pattern(s):
    pattern = r"^[\s\d\-]+$|^[\s\d]+층\s*$"
    match = re.search(pattern, s)
    if match:
        return True


def get_complex_name(remained_str):
    remained_str = remained_str.strip()
    if remained_str == "":
        return "", ""
    if remained_str[0] == "(":
        remained_str = remained_str[1:].strip()

    if has_only_one_set_braket_string(remained_str):
        return remained_str.replace("(", "").replace(")", "").strip(), ""
    elif has_end_only_braket_string(remained_str):
        return remained_str.replace(")", "").strip(), ""
    elif has_not_relevant_pattern(remained_str):
        print("예외문자열:" + remained_str)
        return "", remained_str
    elif has_middle_bracket(remained_str):
        # print("괄호포함문자열:" + remained_str)
        names = remained_str.split(sep=")")
        if len(names) > 2:
            print("이상한문자열:" + remained_str)
        remained_str = "" if is_same_str_by_rem_space(names[0], names[1]) else names[1].strip()
        return names[0].strip(), remained_str
        # return get_super_str(names[0].strip(), names[1].strip())
    elif is_single_word(remained_str):
        # print("문자열만 남은 경우:", remained_str)
        return remained_str, ""

    else:
        print("추가고려 필요 ==> " + remained_str)
        return "", remained_str


def get_dong_complex_name(remained_str):
    dong_name = ''
    complex_name = ''
    sep = ''
    pattern = r'\(([가-힣\d]+[동가리]{1})([\s,)]){1}'
    if not remained_str or remained_str.strip() == "":
        return "", "", ""
    match = re.search(pattern, remained_str)
    if match:
        dong_name = match.group(1)  # 동명 추출
        sep = match.group(2)  # 동명 추출
        remained_str = remained_str.replace('(' + dong_name + sep, '').strip()
        complex_name, remained_str = get_complex_name(remained_str)
    else:
        complex_name, remained_str = get_complex_name(remained_str)
    return dong_name, complex_name, remained_str


def get_complex_dong_ho(address, lat, lon):
    addr1, addr2 = split_address(address)

    # print(addresses_df.head())
    patterns = [
        r'([\dA-Za-z가나다라마바]+동)\s*(\w+[호]{0,1})\s*$',
        r'\)\s*(\w+)\s*[-ㅡ]\s*([\dA-Za-z]+호*)\s*$',
        r'\)\s*[가-힣A-Za-z\.\,]+\s*([\dABCDEabcde]*)\s*[-ㅡ\s]{1}\s*([\dA-Za-z]+호*)\s*$',
        r'\s*(\w+)\s*[-ㅡ]\s*([\dA-Za-z]+호*)\s*$',
        r'(아파트동)\s*(\w+[호]{0,1})\s*$',
        r'(오피스텔동)\s*(\w+[호]{0,1})\s*$',
        r'\)\s*([\dA-Za-z]+호)\s*$',
        r'\)\s*[가-힣A-Za-z\.\,]+([\dA-Za-z]+호)\s*$',
        r'\s*([\dA-Za-z]+호)\s*$',
    ]
    dong = ""
    ho = ""
    temp_addr2 = ""
    dong_name = ""
    complex_name = ""
    for idx, pattern in enumerate(patterns):
        if not addr2 or addr2.strip() == "":
            continue
        # print(row['회원주소2'])
        match = re.search(pattern, addr2)
        if match:
            if len(match.groups()) == 1:
                ho = match.group(1)  # 호만 추출될 경우
                temp_addr2 = addr2.replace(ho, '')
                if '호' not in ho:
                    ho = ho + '호'
            elif len(match.groups()) >= 2:
                dong = match.group(1)  # 동 추출
                ho = match.group(2)  # 호 추출
                addr2 = addr2.replace(ho, '')
                temp_addr2 = addr2.replace(dong, '')
                if '호' not in ho:
                    ho = ho + '호'
                if len(dong) >= 1 and '동' not in dong:
                    dong = dong + '동'
                if idx in [1, 2, 3]:
                    temp_addr2 = temp_addr2.replace('-', '')
            break
    temp_addr2 = temp_addr2.strip()
    dong_name, complex_name, remained_str = get_dong_complex_name(temp_addr2)

    return complex_name, dong, ho


def get_address_info_name_mdn(name, mdn):
    df = get_address_by_name_mdn(name, mdn)
    if df.empty:
        return None
    qry_alladdr_namver_joins = f"SELECT * FROM alladdr_naver_joins WHERE lati={df['회원lati'].iloc[0]} AND longi={df['회원longi'].iloc[0]}"
    df_all = load_data_into_dataframe(qry_alladdr_namver_joins)
    addr = df['회원주소'].iloc[0]
    if len(df_all) == 0:
        return addr, None
    elif len(df_all) == 1:
        apt_name, dong_name, ho_name = get_complex_dong_ho(addr, df_all['lati'], df_all['longi'], )
        print(apt_name, dong_name, ho_name)
        return {'주소': addr, '아파트명': apt_name, '동': dong_name, '호': ho_name, '건물명': df_all['complex_name'].iloc[0], '건물#': df_all['complex_no'].iloc[0], }
    else:
        complex_nos = df_all['complex_no'].unique()
        # 각 complex_no에 대해 SQL 쿼리를 실행하고 결과를 데이터프레임으로 로드하는 루프를 생성합니다.
        for cn in complex_nos:
            df_bn = load_data_into_dataframe(f"SELECT DISTINCT build_name FROM naver_complex_dongho WHERE complex_no = '{cn}'")
            bn_list = df_bn['build_name'].unique().tolist()
            apt_name, dong_name, ho_name = get_complex_dong_ho(addr, df_all['lati'], df_all['longi'], )
            print(apt_name, dong_name, ho_name, bn_list)
            dong_name = dong_name.replace("동", "")
            ho_name = ho_name.replace("호", "")
            if dong_name in bn_list:
                df_all = df_all[df_all['complex_no'] == cn]
                return {'주소': addr, '아파트명': apt_name, '동': dong_name, '호': ho_name, '건물명': df_all['complex_name'].iloc[0], '건물#': df_all['complex_no'].iloc[0], }
        # 같은 주소에 오피스텔과 아파트가 있을 경우 두개의 건물이 df_all에 들어올 수 있음. 이 때  naver_complex_dongho 테이블의 build_name 과 비교해서 하나를 선택해야함.
        return addr, None


def search_complex_realprice(complex_no, dong_name, ho_name):
    qry = """
        SELECT
            nc.complex_name,
            ncp.complex_no,
            ncd.build_name,
            ncd.ho_name,
            ncd.ho_floor,
            ncp.*
        FROM naver_complexes  nc
        LEFT JOIN naver_complex_dongho ncd ON nc.complex_no = ncd.complex_no
        LEFT JOIN naver_complex_realprices ncp ON ncp.complex_no = nc.complex_no and ncp.pyeong_no = ncd.pyeong_no
        WHERE
    """
    dong_name = dong_name.replace("동", "") if dong_name else ''
    ho_name = ho_name.replace("호", "") if ho_name else ''
    print('dong_name:', dong_name, ', ho:', ho_name)
    target_qry = qry + f"nc.complex_no='{complex_no}'"
    if dong_name and dong_name.strip() != '':
        target_qry += f"and ncd.build_name='{dong_name}'"
    else:
        target_qry += "and ncd.build_name='1'"

    if ho_name and ho_name.strip() != '':
        target_qry += f"and ho_name='{ho_name}'"
    # print(target_qry)
    df = load_data_into_dataframe(target_qry)
    ordered_df = df.sort_values(by=['trade_base_year', 'trade_base_month'], ascending=False)
    # display(df) #dong ho pyeong and price
    return ordered_df

['/home/max/cleanbeding/naver-realestate/TIPS', '/home/max/miniconda3/lib/python310.zip', '/home/max/miniconda3/lib/python3.10', '/home/max/miniconda3/lib/python3.10/lib-dynload', '', '/home/max/miniconda3/lib/python3.10/site-packages']


In [None]:
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact, Output
from IPython.display import clear_output
import warnings
warnings.simplefilter("ignore")


def display_with_style(name, price):
    # Create a DataFrame
    df = pd.DataFrame({'Name': [name], 'Price': [price]})

    # Define a function to color the 'Name' cell grey
    def color_name_cell(val):
        color = 'grey' if val == name else 'white'
        return 'background-color: %s' % color

    styled_df = df.style.applymap(color_name_cell, subset=['Name'])
    # Hide the index
    styled_df = styled_df.set_properties(**{'display.index': 'none'})

    # Display the styled DataFrame
    display(styled_df)


# Create text input fields
name_input = widgets.Text(
    value='',
    placeholder='Enter name',
    description='Name:',
)

mdn_input = widgets.Text(
    value='',
    placeholder='Enter last 4 digits of phone number',
    description='MDN:',
)

# Create a button
button = widgets.Button(description='Find')

# Display the widgets
output = Output()


def on_button_clicked(b):
    # Optionally clear previous output
    # clear_output(wait=True)
    output.clear_output(wait=True)
    # Get the input values
    name = name_input.value
    mdn = mdn_input.value

    # Call the function with the input values
    result = get_address_info_name_mdn(name, mdn)
    # display(result)

    with output:
        if result is None:
            display('User not found')
        else:
            print('result===>', result)
            df_price = search_complex_realprice(result['건물#'], result['동'], result['호'])
            # Print the result
            display_with_style(name, df_price['formatted_price'].iloc[0])
            display(pd.DataFrame([result]))
            display(df_price)


# Set the function to be called when the button is clicked
button.on_click(on_button_clicked)
display(name_input, mdn_input, button, output)


# Use interact to display widgets
# display(interact(on_button_clicked, name=name_input, mdn=mdn_input))