In [None]:
import requests
import pandas as pd

def get_product_location(pid, key_word):
    url = f'https://api.bunjang.co.kr/api/1/find_v2.json?order=date&n=100&page=0&req_ref=search&q={key_word}&stat_device=w&stat_category_required=1&version=4'
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json().get('list', [])
        for item in data:
            if item['pid'] == pid:
                return item.get('location', '')
    else:
        print(f'Request to {url} returned status code {response.status_code}')
    return None


def get_product_detail(pid, key_word):
    url = f'https://api.bunjang.co.kr/api/pms/v1/products-detail/{pid}?viewerUid=-1'
    response = requests.get(url) #url 요청 보내고 응답 받아옴

    if response.status_code ==200:
        data = response.json().get('data', {}).get('product', {})
        location = get_product_location(pid, key_word)  
        return {
                'pid': data.get('pid', ''),
                'name': data.get('name', ''),
                'description': data.get('description', ''),
                'price': data.get('price', ''),
                'qty': data.get('qty', ''),
                'saleStatus': data.get('saleStatus', ''),
                'status': data.get('status', ''),
                'imageUrl': data.get('imageUrl', ''),
                'keywords' : data.get('keywords',''),
                'address' : data.get('geo', {}).get('address', ''),
                'label' : data.get('geo', {}).get('label', ''),
                'updatedBefore' : data.get('updatedBefore',''),
                'location' : location,  # Use the fetched location
            }
    else:
        print(f'Request to {url} returned status code {response.status_code}')
        return  None


def bunjang(key_word):
    data = []
    page = 0
    items_number = 100  # 최대값으로 설정

    while True:
        url = f'https://api.bunjang.co.kr/api/1/find_v2.json?order=date&n={items_number}&page={page}&req_ref=search&q={key_word}&stat_device=w&stat_category_required=1&version=4'
        response = requests.get(url)
        datas = response.json().get('list', [])

        if not datas:
            break

        for piddata in datas:
            pid = piddata['pid']
            detail_data = get_product_detail(pid, key_word)
            if detail_data is not None:
                data.append(detail_data)

        page += 1 

    df = pd.DataFrame(data)

    df['link'] = 'https://m.bunjang.co.kr/products/' + df['pid'].astype(str)
    df['keyword'] = key_word

    df.to_csv(f'{key_word}데이터.csv', index=False, encoding='utf-8-sig')


In [None]:
bunjang('맥북')

In [None]:
import pandas as pd

def price(input_file, output_file, prices):
    """
    input_file 불러올 파일
    output_file 저장파일 이름
    price 전처리할 가격
    """
    # csv 파일을 읽어옵니다
    df = pd.read_csv(input_file)

    # 'price' 컬럼이 숫자형이 아닐 경우, 숫자형으로 변환합니다
    df['price'] = pd.to_numeric(df['price'], errors='coerce')

    # 'price'가 설정한 임계값 이하인 행을 제거합니다
    df = df[df['price'] > prices]

    # 결과를 새 csv 파일에 저장합니다
    df.to_csv(output_file, index=False,encoding='utf-8-sig')



In [None]:

price('맥북데이터.csv', '맥북정제된데이터.csv',299999)