In [100]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
import pyautogui as py
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
import json
import requests


def get_price(soup):
    details = soup.find_all('section')[2]
    product_price = details.find( class_ = 'flex justify-between mb-2 text-base text-left _product__price').span.get_text()    
    return convert_currency(product_price)
    
def get_name(soup):
    h1 = soup.find('h1')
    return h1.get_text()

def get_details(soup):
  desc = soup.find('div', class_ = '_product__description').find_all('div')[1].find_all('p')
  
  description_parts = []
  
  # 색상 변수 초기화
  color = ""
  material = ""
  
  # 각 p 태그를 순회하면서 내용 추출
  for p in desc:
      text = p.get_text(strip=True)
      if text.startswith("Color:"):
          color = text.split("Color:")[1].strip()
      elif text.startswith("Material:"):
          material = text.split("Material:")[1].strip()
      else:
          description_parts.append(text)
  
  # description_parts를 합쳐서 하나의 문자열로 만듦
  description = "\n".join(description_parts)
  
  # 결과 딕셔너리 생성
  result = {
      "description": description,
      "color": color,
      "material" : material
  }
  return result

def get_photos(soup):
  if not soup.find('div', class_ = '_product__media'):
     return []
  imgs = soup.find('div', class_ = '_product__media').find_all('img')
  result = []
  for img in imgs:
    result.append(  'https:' + img['src'])
  return result

def get_sizes(soup):
  if not soup.find('div', class_='_product__option-item'):
     return {}
  labels = soup.find('div', class_='_product__option-item').find('fieldset', class_ = '_sizes-grid').find_all('label')
  size_availability = {}
  for label in labels:
    size = label.get('aria-label').split(' ')[1]
    btn_div = label.find('div', class_='_btn')
    
    if 'open:bg-black' in btn_div.get('class'):
        size_availability[size] = False
    else:
        size_availability[size] = True
            
  return size_availability

def get_is_sold_out(soup):
  if soup.find('button', id='addToCartForm' ):
    btn_text = soup.find('button', id='addToCartForm' ).find('span').get_text()
    
    return btn_text == 'Notify me when available'
  return True


def extract_infos(soup, category):
   is_sold_out = get_is_sold_out(soup)
   sizes = get_sizes(soup)
   photos = get_photos(soup)
   details = get_details(soup)
   name = get_name(soup)
   price = get_price(soup)
   return {
      "is_sold_out": is_sold_out,
      "sizes": sizes,
      "photos": photos,
      "description": details["description"],
      "color": details["color"],
      "material": details["material"],
      "name": name,
      "price": price,
      "category" : category
   }

def urls_from_json(file_path):
    # JSON 파일 열기
    with open(file_path, 'r') as file:
        urls = json.load(file)
    
    return urls
def convert_currency(currency_string):
    # 통화 기호와 쉼표 제거
    number_string = currency_string.replace('₩', '').replace(',', '')
    # 숫자로 변환하여 반환
    return int(number_string)


options = Options()
# options.add_argument('--start-maximized')
# options.add_experimental_option('detach', True)

driver = webdriver.Chrome(options=options)

# be_url = 'http://woojisoo.shop/api/products/'
be_url = 'http://3.34.94.193:8000/api/products/'

url = "https://wearebraindead.com/products/brain-dead-x-homeshake-horsie-t-shirt-clay?variant=44446144331907"
url2 = 'https://wearebraindead.com/products/brain-dead-x-homeshake-horsie-lp-yellow?variant=44150010937475'
url3 = 'https://wearebraindead.com/products/oakley-factory-team-suede-flesh-lily-pad-white?variant=43320216191107'
soldout_url = 'https://wearebraindead.com/products/ms-45-x-brain-dead-t-shirt-black?variant=39705442713731'
top_list_url ='https://wearebraindead.com/collections/apparel?filter.p.m.nodnod.type=gid%3A%2F%2Fshopify%2FMetaobject%2F50877235331&pagination=5'
bottom_list_url ='https://wearebraindead.com/collections/apparel?filter.p.m.nodnod.type=gid%3A%2F%2Fshopify%2FMetaobject%2F50877268099&pagination=5'
acce_list_url = 'https://wearebraindead.com/collections/accessories?filter.p.m.nodnod.type=gid%3A%2F%2Fshopify%2FMetaobject%2F50877300867&pagination=5'
foot_list_url = 'https://wearebraindead.com/collections/footwear?pagination=5'
home_list_url = 'https://wearebraindead.com/collections/objects?pagination=5'
eye_list_url = 'https://wearebraindead.com/collections/accessories?filter.p.m.nodnod.type=gid%3A%2F%2Fshopify%2FMetaobject%2F50877333635&pagination=5'

def get_soup(url):
    driver.get(url) ## 사이트 접속

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    return soup


def post_product(detail_url, category):
    soup = get_soup(detail_url)
    infos = extract_infos(soup, category)
    print(infos)
    json_data = json.dumps(infos)
    headers = {
    'Content-Type': 'application/json'
    }
    response = requests.post(be_url, data=json_data, headers=headers)
    print('Status Code:', response.status_code)
    print('Response Body:', response.text)

urls = urls_from_json('./backup/bottom_urls.json')

In [None]:

soup = get_soup(bottom_list_url)


bottom_list_url

In [101]:
urls = urls_from_json('./backup/home_urls.json')

cateogry = "homegoods"
# post_product(urls[0], cateogry)
for url in urls:
  post_product(url, cateogry)

{'is_sold_out': True, 'sizes': {}, 'photos': ['https://wearebraindead.com/cdn/shop/files/Brain_Dead_x_Adidas_Bowling_Shoe_Black_Side_optimized.jpg?v=1717631325&width=1024', 'https://wearebraindead.com/cdn/shop/files/Brain_Dead_x_Adidas_Bowling_Shoe_Black_Front_optimized.jpg?v=1717631325&width=1024', 'https://wearebraindead.com/cdn/shop/files/Brain_Dead_x_Adidas_Bowling_Shoe_Black_Back_optimized.jpg?v=1717631325&width=1024', 'https://wearebraindead.com/cdn/shop/files/Brain_Dead_x_Adidas_Bowling_Shoe_Black_Top_optimized.jpg?v=1717631325&width=1024', 'https://wearebraindead.com/cdn/shop/files/Brain_Dead_x_Adidas_Bowling_Shoe_Black_Bottom_optimized.jpg?v=1717631325&width=1024'], 'description': 'This item is in US men\'s sizing. If you\'re between sizes, we recommend to size up.\nOur "righty" leather bowling shoe with Adidas. A 1-to-1 reissue of their 1978 design in a classic monochrome colorway with serrated triple stripes on the upper. Features a wooden slide sole and rubber stop heel des

In [None]:
import json
scripts = soup.find('div', class_= '_card').find_all('a')

my_urls = []
for i, value in enumerate(scripts):
  if i % 2 == 0:
    my_url = "https://wearebraindead.com"+  value['href']
    my_urls.append(my_url)

my_urls

# json.loads(text)

In [97]:
url = urls[0]

print(url)
soup = get_soup(url)

btn_text = soup.find('button', id='addToCartForm' ).find('span').get_text()
print(btn_text)


https://wearebraindead.com/collections/footwear/products/brain-dead-x-adidas-bowling-shoe-black


AttributeError: 'NoneType' object has no attribute 'find'