In [None]:
# 라이브러리 임포트
import os
import pandas as pd
from pymongo import MongoClient
from pyproj import Transformer
from dotenv import load_dotenv

load_dotenv()
os.chdir('../')
os.getcwd()

In [2]:
# 사용자 정보
filedir=os.getcwd()+'/data/od_uuid/2024_1/'
# 데이터프레임 칼럼 지정
column_name=['uuid','time_begin','time_end','origin_lat','origin_lng','destination_lat','destination_lng','distance','heart','rank']

client = MongoClient(os.getenv('DB_ADR'),
          username=os.getenv('DB_USER'),
          password=os.getenv('DB_PASSWORD'),
          authSource=os.getenv('DB_AuthSource'),
          authMechanism=os.getenv('DB_AuthMechanism'))

database = client.get_database(os.getenv('DB_Collection'))
collections_to_check = ["public_open_04", "public_open_05"]  # 순회할 컬렉션 이름 리스트
# 새로 만들 컬렉션
collection_new = database.get_collection('user_destination_restaurant_timefiltering')

# 좌표 변환기 설정 (EPSG:5174 -> EPSG:4326)
transformer = Transformer.from_crs("EPSG:5174", "EPSG:4326", always_xy=True)

# 데이터프레임에 칼럼명 지정
def Make_column(csv_file):
    raw_data=pd.read_csv(csv_file)
    data=raw_data.values.tolist()
    col_data=pd.DataFrame(data, columns=column_name)
    return col_data[['uuid','time_end','destination_lat','destination_lng']]

#도착시간이 식사시간(11~13, 18~20시인 데이터 필터링)
time_filter=['11', '12', '13', '18', '19', '20']
def time_filtering(csv_file):
    filter=csv_file.apply(lambda x: True if x['time_end'][11:13] in time_filter else False, axis=1)
    processing_csv=csv_file[filter]
    return processing_csv

In [3]:
# MongoDB 데이터를 메모리에 적재
def load_mongo_data():
    memory_data = []
    for collection_name in collections_to_check:
        source_collection = database[collection_name]
        for restaurant in source_collection.find({}, {
            "좌표정보x(epsg5174)": 1,
            "좌표정보y(epsg5174)": 1,
            #"영업상태명": 1,
            "소재지전체주소": 1,
            "도로명전체주소": 1,
            "사업장명": 1,
            #"업태구분명": 1,
            #"전통업소주된음식": 1
        }):
            x = restaurant.get("좌표정보x(epsg5174)")
            y = restaurant.get("좌표정보y(epsg5174)")
            if x is not None and y is not None:
                lon, lat = transformer.transform(x, y)
                memory_data.append({
                    "lat": lat,
                    "lon": lon,
                    "restaurant": {
                        "소재지전체주소": restaurant.get("소재지전체주소"),
                        "도로명전체주소": restaurant.get("도로명전체주소"),
                        "사업장명": restaurant.get("사업장명"),
                        #"업태구분명": restaurant.get("업태구분명"),
                        #"전통업소주된음식": restaurant.get("전통업소주된음식")
                    }
                })
    return memory_data

In [4]:
# 메모리에 적재된 MongoDB 데이터와 비교
def match_and_store(processing_dataframe, memory_data):
    for _, row in processing_dataframe.iterrows():
        uuid = row['uuid']
        time_end = row['time_end']
        destination_lat = row['destination_lat']
        destination_lng = row['destination_lng']
        
        # 매칭된 식당 정보를 저장
        matched_restaurants = {
            "소재지전체주소": [],
            "도로명전체주소": [],
            "사업장명": [],
            #"업태구분명": [],
            #"전통업소주된음식": []
        }

        # 메모리 데이터와 비교
        for data in memory_data:
            if abs(data["lat"] - destination_lat) < 0.0001 and abs(data["lon"] - destination_lng) < 0.0001:
                for key in matched_restaurants.keys():
                    value = data["restaurant"].get(key)
                    if value:
                        matched_restaurants[key].append(value)

        # 매칭된 데이터가 있으면 MongoDB에 저장
        if any(matched_restaurants[key] for key in matched_restaurants):
            new_document = {
                "uuid": uuid,
                "time_end": time_end,
                "destination_lat": destination_lat,
                "destination_lng": destination_lng,
                **matched_restaurants
            }
            collection_new.insert_one(new_document)

In [None]:
# 메인 실행
memory_data = load_mongo_data()  # MongoDB 데이터를 메모리에 적재
print("MongoDB 데이터 메모리 로드 완료")

In [None]:
for file in os.listdir(filedir):
    if file.endswith(".csv"):
        file_path = os.path.join(filedir, file)
        processing_dataframe = time_filtering(Make_column(file_path))
        match_and_store(processing_dataframe, memory_data)

print("처리 완료")