##### 파일 명세
- udrt_kakao_cdh와 udrt_kakao_cdh_updated2를 [사업장명]을 기준으로 새로운 컬렉션 생성
- 새로운 컬렉션(udrt_kakao_cdh_old_new_place_category) 필드=[uuid, category_name, place_name, mapped_category]

In [None]:
import os
from dotenv import load_dotenv
from pymongo import MongoClient, UpdateMany
import geopandas as gpd
import pandas as pd
from fiona.crs import from_epsg
from shapely.geometry import Point

In [None]:
# 데이터베이스 연결
load_dotenv()
client = MongoClient(os.getenv('DB_ADR'),
          username=os.getenv('DB_USER'),
          password=os.getenv('DB_PASSWORD'),
          authSource=os.getenv('DB_AuthSource'),
          authMechanism=os.getenv('DB_AuthMechanism'))
db = client.get_database(os.getenv('DB_Collection'))

#컬렉션 불러오기 
cl_udrt_kakao_cdh=db.get_collection("udrt_kakao_cdh")
cl_udrt_kakao_cdh_updated2=db.get_collection("udrt_kakao_cdh_updated2")

In [None]:
# udrt_kakao_cdh_updated2의 값 딕셔너리로 변환
dict_udrt_kakao_cdh_updated2 = {doc["place_name"]: doc["mapped_category"] for doc in cl_udrt_kakao_cdh_updated2.find({}, {"place_name": 1, "mapped_category": 1, "_id": 0})}
print(f"udrt_kakao_cdh_updated2의 값 딕셔너리로 변환 완료.")

# udrt_kakao_cdh 가져와서 필드 병합 후 새로운 리스트 생성
new_documents = []
for doc in cl_udrt_kakao_cdh.find({}, {"uuid": 1, "category_name": 1, "place_name": 1, "_id": 0}):
    place_name = doc["place_name"]
    category_name = doc["category_name"]
    if place_name in dict_udrt_kakao_cdh_updated2:  # 필드1이 일치하는 경우
        new_documents.append({
            "uuid": doc["uuid"],
            "category_name":doc["category_name"],
            "place_name": doc["place_name"],
            "mapped_category": dict_udrt_kakao_cdh_updated2["mapped_category"]
        })
        print(f"{len(new_documents)}개의 문서 생성됨")


# 최종 데이터를 new_collection에 삽입
new_collection=db["udrt_kakao_cdh_old_new_place_category"]
if new_documents:
    new_collection.insert_many(new_documents)

print(f"{len(new_documents)}개의 문서를 udrt_kakao_cdh_old_new_place_category에 삽입 완료.")

#### basic-unit 필드 추가

In [None]:
# 센서스 데이터 로드
os.chdir('../')
location_label_path=os.getcwd()+'/data/행정구역구분/BND_SIGUNGU_PG.shp'
# 행정동 데이터 좌표계 변환
location_label=gpd.read_file(location_label_path, encoding='euc-kr')
location_label.to_crs(epsg=4326, inplace=True)

# 특별시 및 광역시 전처리
metropolitan_city={'11':'서울', '21':'부산', '22':'대구', '23':'인천', '24':'광주', '25':'대전', '26':'울산'}
def update_dong(row):
    if row['SIGUNGU_CD'][:2] in metropolitan_city.keys():
        region_prefix = metropolitan_city.get(row['SIGUNGU_CD'][:2], '')
        if region_prefix:
            return f"{region_prefix} {row['SIGUNGU_NM']}"
    return row['SIGUNGU_NM']

# 센서스 데이터 정보 불러오기
census_col=['시도코드','시도명칭','시군구코드','시군구명칭','읍면동코드','읍면동명칭']
raw_census=pd.read_excel(os.getcwd()+'/data/행정구역구분/센서스_공간정보_지역_코드.xlsx')
def make_census_dict(raw_census):
    raw_census=raw_census.drop(index=0, axis=1)
    data=raw_census.values.tolist()
    census=pd.DataFrame(data, columns=census_col)
    census=census.set_index('시도코드')
    census=census['시도명칭']
    census=census.drop_duplicates()
    census_dict=census.to_dict()
    return census_dict
census_dict=make_census_dict(raw_census)

#지역 구분 칼럼 추가
def Region_col_add(row):
    if int(row['SIGUNGU_CD'][:2]) in census_dict.keys():
        region = census_dict.get(int(row['SIGUNGU_CD'][:2]), '')
        return region


In [None]:
# 사용할 geopandas 데이터프레임 전처리
location_label['SIGUNGU_NM'] = location_label.apply(update_dong, axis=1)
location_label['Region'] = location_label.apply(Region_col_add, axis=1)
location_label

In [None]:
data=[]
for doc in cl_udrt_kakao_cdh.find({}, {"uuid": 1, "category_name": 1, "place_name": 1, 'x':1, 'y':1 ,"_id": 0}):
    data.append({"uuid":doc["uuid"], "category_name":doc["category_name"], "place_name":doc["place_name"], "geometry": Point(doc["x"], doc["y"])})

gdf = gpd.GeoDataFrame(data, geometry="geometry", crs="EPSG:4326")
gdf = gpd.sjoin(gdf, location_label, how="left", predicate="within")
gdf = gdf[['uuid','category_name', 'place_name', 'SIGUNGU_NM']]
gdf=gdf.rename(columns={'SIGUNGU_NM':'basic-unit'})
gdf

In [None]:
# gdf의 각 행을 순회하며 MongoDB 업데이트 준비
bulk_updates = []
for idx, row in gdf.iterrows():
    query = {"category_name": row["category_name"], "place_name": row["place_name"]} # 조건
    update = {"$set": {"basic-unit": row["basic-unit"]}}  # 추가할 필드

    bulk_updates.append(UpdateMany(query, update))

cl_kakao_category_visit=db["kakao_category_visit_test"]
if bulk_updates:
    cl_kakao_category_visit.bulk_write(bulk_updates, ordered=False)

print(f"{len(bulk_updates)}개의 문서를 업데이트 완료")