In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# File system and data handling
import os
import pandas as pd
import glob

# Set folder path (adjust if needed)
folder_path = '/content/drive/MyDrive/LOCALDATA_NOWMON_CSV-5'

# Read all CSV files in the folder
all_files = glob.glob(os.path.join(folder_path, "*.csv"))

# Merge only relevant columns and filter for addresses with '번지'
df_list = []
for file in all_files:
    try:
        df = pd.read_csv(file, encoding='utf-8')
    except:
        df = pd.read_csv(file, encoding='cp949')  # fallback

    # Columns to keep (adapt as needed based on your data)
    target_cols = ['상호', '설치주소', '업종', '면적', '좌표', '경도', '위도', '전화번호']
    df = df[[col for col in target_cols if col in df.columns]]

    # Filter only rows where 주소에 '번지' 포함
    df = df[df['설치주소'].astype(str).str.contains('번지')]

    df_list.append(df)

# Concatenate all filtered DataFrames
merged_df = pd.concat(df_list, ignore_index=True)
print(f"📦 Total rows after merge: {len(merged_df)}")


Mounted at /content/drive


ValueError: No objects to concatenate

In [2]:
# Check column names in each CSV file
for file in all_files:
    try:
        df_temp = pd.read_csv(file, encoding='utf-8')
    except:
        df_temp = pd.read_csv(file, encoding='cp949')

    print(f"\n📂 File: {os.path.basename(file)}")
    print("🧩 Columns:", df_temp.columns.tolist())


In [3]:
df_list = []

for file in all_files:
    try:
        df = pd.read_csv(file, encoding='utf-8')
    except:
        df = pd.read_csv(file, encoding='cp949')

    # 주소 관련 컬럼 자동 탐색
    address_cols = [col for col in df.columns if '주소' in col]
    if not address_cols:
        continue  # 주소 컬럼 없으면 건너뜀

    address_col = address_cols[0]  # 첫 번째 주소 컬럼 사용

    # 필요한 컬럼 필터링 (자동 탐색)
    keep_cols = ['상호', address_col, '업종', '면적', '좌표', '경도', '위도', '전화번호']
    keep_cols_exist = [col for col in keep_cols if col in df.columns]
    df = df[keep_cols_exist]

    # '번지' 포함된 주소만 필터
    df = df[df[address_col].astype(str).str.contains('번지', na=False)]

    # 컬럼명 통일
    df.rename(columns={address_col: '설치주소'}, inplace=True)

    df_list.append(df)

# 병합
if df_list:
    merged_df = pd.concat(df_list, ignore_index=True)
    print(f"✅ 병합된 행 수: {len(merged_df)}")
    display(merged_df.head())
else:
    print("⚠️ '번지'가 포함된 주소를 가진 데이터가 없습니다.")


⚠️ '번지'가 포함된 주소를 가진 데이터가 없습니다.


In [4]:
import os
import glob

# 폴더 경로 (로컬이 아니라 Colab에서 사용 중이므로 업로드된 단일 경로로 설정)
folder_path = "/mnt/data"
all_files = glob.glob(os.path.join(folder_path, "*.csv"))

df_list = []

# 자동 주소 컬럼 탐지 및 병합
for file in all_files:
    try:
        temp_df = pd.read_csv(file, encoding='euc-kr')
    except:
        try:
            temp_df = pd.read_csv(file, encoding='cp949')
        except:
            continue

    # 주소 컬럼 자동 탐색 (우선순위: 도로명전체주소 > 소재지전체주소)
    address_col = None
    for col in ['도로명전체주소', '소재지전체주소']:
        if col in temp_df.columns:
            address_col = col
            break

    # 필수 주소 컬럼 없으면 스킵
    if not address_col:
        continue

    # '번지'가 포함된 주소만 필터링
    temp_df = temp_df[temp_df[address_col].astype(str).str.contains('번지', na=False)]

    # 사용 가능한 컬럼만 추출
    selected_cols = {
        '사업장명': '사업장명',
        '업태구분명': '업종',
        '소재지면적': '면적',
        '소재지전화': '전화번호',
        '좌표정보x(epsg5174)': 'X',
        '좌표정보y(epsg5174)': 'Y',
        address_col: '설치주소'
    }
    matched_cols = {k: v for k, v in selected_cols.items() if k in temp_df.columns}
    renamed_df = temp_df[list(matched_cols.keys())].rename(columns=matched_cols)

    df_list.append(renamed_df)

# 병합
if df_list:
    merged_df = pd.concat(df_list, ignore_index=True)
else:
    merged_df = pd.DataFrame()

merged_df.head()


In [5]:
# '번지' 조건 없이 주소가 존재하는 행 모두 병합하도록 조건 변경
df_list = []

for file in all_files:
    try:
        temp_df = pd.read_csv(file, encoding='euc-kr')
    except:
        try:
            temp_df = pd.read_csv(file, encoding='cp949')
        except:
            continue

    address_col = None
    for col in ['도로명전체주소', '소재지전체주소']:
        if col in temp_df.columns:
            address_col = col
            break

    if not address_col:
        continue

    # 주소가 비어있지 않은 경우만 필터링
    temp_df = temp_df[temp_df[address_col].notna()]

    selected_cols = {
        '사업장명': '사업장명',
        '업태구분명': '업종',
        '소재지면적': '면적',
        '소재지전화': '전화번호',
        '좌표정보x(epsg5174)': 'X',
        '좌표정보y(epsg5174)': 'Y',
        address_col: '설치주소'
    }
    matched_cols = {k: v for k, v in selected_cols.items() if k in temp_df.columns}
    renamed_df = temp_df[list(matched_cols.keys())].rename(columns=matched_cols)

    df_list.append(renamed_df)

# 병합
if df_list:
    merged_df = pd.concat(df_list, ignore_index=True)
else:
    merged_df = pd.DataFrame()

import ace_tools as tools; tools.display_dataframe_to_user(name="Merged All Business Data", dataframe=merged_df)

merged_df.head()


ModuleNotFoundError: No module named 'ace_tools'

In [6]:
import zipfile

# 압축 파일 생성 경로
zip_path = "/mnt/data/LOCALDATA_NOWMON_CSV-5.zip"

# 압축 시작
with zipfile.ZipFile(zip_path, 'w') as zipf:
    for file in all_files:
        arcname = os.path.basename(file)
        zipf.write(file, arcname=arcname)

zip_path


FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/LOCALDATA_NOWMON_CSV-5.zip'

In [7]:
# 📦 Kakao API 주소 → 위도/경도 변환 함수
import requests
import time

def get_lat_lng_from_kakao(address, api_key):
    url = f"https://dapi.kakao.com/v2/local/search/address.json?query={address}"
    headers = {"Authorization": f"KakaoAK {api_key}"}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        result = response.json()
        if result['documents']:
            x = result['documents'][0]['x']
            y = result['documents'][0]['y']
            return float(x), float(y)
    return None, None


In [8]:
import pandas as pd
from gspread_dataframe import get_as_dataframe

# ✅ 시트에서 데이터 불러오기
df = get_as_dataframe(worksheet).dropna(subset=['설치주소'])

# ✅ 좌표 추출
df['경도'], df['위도'] = zip(*df['설치주소'].apply(lambda x: get_lat_lng_from_kakao(x, KAKAO_API_KEY)))
df['좌표확인'] = df.apply(lambda row: f"https://www.google.com/maps?q={row['위도']},{row['경도']}" if pd.notna(row['위도']) else "", axis=1)

# ✅ 실패한 경우는 시군구 평균 좌표로 대체하거나 별도 컬럼으로 분리


NameError: name 'worksheet' is not defined

In [9]:
# ✅ 설치 (한 번만 실행)
!pip install --upgrade gspread gspread_dataframe oauth2client requests

# ✅ 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

# ✅ 라이브러리
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from gspread_dataframe import get_as_dataframe
import requests

# ✅ 설정
KAKAO_API_KEY = "af04a0a8e5416c95eaa04cccc060031d"
CREDENTIALS_PATH = "/content/drive/My Drive/Key/credentials.json"
SPREADSHEET_URL = "https://docs.google.com/spreadsheets/d/1pjj0r5qmmGUtl9zA2ekT14xgqsOXuR0zFjSDitWhHmA/edit"
SHEET_NAME = "3월마감 조건추출 위경도기초 선택컬럼_2403"

# ✅ 인증 및 시트 열기
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
credentials = ServiceAccountCredentials.from_json_keyfile_name(CREDENTIALS_PATH, scope)
gc = gspread.authorize(credentials)
spreadsheet = gc.open_by_url(SPREADSHEET_URL)
worksheet = spreadsheet.worksheet(SHEET_NAME)

# ✅ 시트에서 데이터 불러오기
df = get_as_dataframe(worksheet).dropna(subset=['설치주소'])

# ✅ Kakao API 주소 → 위도/경도 변환 함수
def get_lat_lng_from_kakao(address, api_key):
    url = f"https://dapi.kakao.com/v2/local/search/address.json?query={address}"
    headers = {"Authorization": f"KakaoAK {api_key}"}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        result = response.json()
        if result['documents']:
            x = result['documents'][0]['x']
            y = result['documents'][0]['y']
            return float(x), float(y)
    return None, None

# ✅ 좌표 추출
df['경도'], df['위도'] = zip(*df['설치주소'].apply(lambda x: get_lat_lng_from_kakao(x, KAKAO_API_KEY)))
df['좌표링크'] = df.apply(lambda row: f"https://www.google.com/maps?q={row['위도']},{row['경도']}" if pd.notna(row['위도']) else "", axis=1)

# ✅ 미리보기
df[['설치주소', '경도', '위도', '좌표링크']].head()



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


KeyboardInterrupt: 

In [10]:
# ✅ 설치 (한 번만 실행)
!pip install --upgrade gspread gspread_dataframe oauth2client requests

# ✅ 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

# ✅ 라이브러리
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from gspread_dataframe import get_as_dataframe
import requests

# ✅ 설정
KAKAO_API_KEY = "af04a0a8e5416c95eaa04cccc060031d"
CREDENTIALS_PATH = "/content/drive/My Drive/Key/credentials.json"
SPREADSHEET_URL = "https://docs.google.com/spreadsheets/d/1pjj0r5qmmGUtl9zA2ekT14xgqsOXuR0zFjSDitWhHmA/edit"
SHEET_NAME = "3월마감 조건추출 위경도기초 선택컬럼_2403"

# ✅ 인증 및 시트 열기
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
credentials = ServiceAccountCredentials.from_json_keyfile_name(CREDENTIALS_PATH, scope)
gc = gspread.authorize(credentials)
spreadsheet = gc.open_by_url(SPREADSHEET_URL)
worksheet = spreadsheet.worksheet(SHEET_NAME)

# ✅ 시트에서 데이터 불러오기
df = get_as_dataframe(worksheet).dropna(subset=['설치주소'])

# ✅ Kakao API 주소 → 위도/경도 변환 함수
def get_lat_lng_from_kakao(address, api_key):
    url = f"https://dapi.kakao.com/v2/local/search/address.json?query={address}"
    headers = {"Authorization": f"KakaoAK {api_key}"}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        result = response.json()
        if result['documents']:
            x = result['documents'][0]['x']
            y = result['documents'][0]['y']
            return float(x), float(y)
    return None, None

# ✅ 좌표 추출
df['경도'], df['위도'] = zip(*df['설치주소'].apply(lambda x: get_lat_lng_from_kakao(x, KAKAO_API_KEY)))
df['좌표링크'] = df.apply(lambda row: f"https://www.google.com/maps?q={row['위도']},{row['경도']}" if pd.notna(row['위도']) else "", axis=1)

# ✅ 미리보기
df[['설치주소', '경도', '위도', '좌표링크']].head()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


KeyboardInterrupt: 