# 그린맨게이밍 크롤링

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, TimeoutException
from selenium.webdriver.support.ui import Select
import time
import pandas as pd

# 크롬 옵션 설정
def configure_chrome_options():
    options = Options()
    options.add_argument("--start-maximized")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option("useAutomationExtension", False)
    options.add_argument("lang=ko_KR")
    # options.add_argument("--headless") 
    return options

# 드라이버 실행
options = configure_chrome_options()
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 20)

driver.get("https://www.greenmangaming.com/ko/all-games/platforms-os/pc/")

try:
    accept_button = wait.until(EC.element_to_be_clickable((By.ID, "privacy_pref_optin")))
    accept_button.click()
    print("쿠키 동의 완료 ('Accept All' 클릭)")
    time.sleep(2)
except:
    print("쿠키 동의 버튼을 찾지 못했거나 이미 처리됨")

last_height = driver.execute_script("return document.body.scrollHeight")
attempts = 0
max_attempts = 5

while attempts < max_attempts:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    
    try:
        show_more_btn = wait.until(EC.element_to_be_clickable((By.ID, "show-more")))
        driver.execute_script("arguments[0].scrollIntoView();", show_more_btn)
        show_more_btn.click()
        print("🔘 'Show More Results' 클릭")
        time.sleep(2)
        attempts = 0  # 성공 시 카운터 리셋
    except (NoSuchElementException, ElementClickInterceptedException, TimeoutException):
        attempts += 1
        print(f"버튼 클릭 실패 ({attempts}/{max_attempts})")
        time.sleep(1)
    
    # 스크롤 위치 확인
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

print("모든 게임 로딩 완료. 크롤링 시작...")

games = driver.find_elements(By.CSS_SELECTOR, "li.ais-Hits-item")
game_links = [game.find_element(By.CSS_SELECTOR, "a").get_attribute("href") for game in games]
print(f"총 {len(game_links)}개의 게임 링크 수집 완료")



def handle_age_verification(driver):
    try:
        # 모달창이 나타나는지 확인 (5초 내에)
        modal = WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "div.modal-body"))
        )
        if modal:
            print("  ⚠️ 연령 확인 모달창 처리 중...")
            
            # 생년월일 선택: 1990년 1월 1일로 설정
            # 일 선택
            day_select = Select(driver.find_element(By.ID, "day"))
            day_select.select_by_visible_text("01")
            
            # 월 선택
            month_select = Select(driver.find_element(By.ID, "month"))
            month_select.select_by_visible_text("01")
            
            # 연 선택
            year_select = Select(driver.find_element(By.ID, "year"))
            year_select.select_by_visible_text("1990")
            
            # 제출 버튼 클릭
            submit_btn = driver.find_element(By.CSS_SELECTOR, "button.btn-success")
            submit_btn.click()
            time.sleep(2)  # 모달창 닫힘 대기
            return True
    except TimeoutException:
        pass  # 모달창이 없으면 무시
    except Exception as e:
        print(f"모달창 처리 중 오류: {str(e)}")
    return False

def get_game_details(driver, url):
    driver.get(url)
    time.sleep(3)  # 기본 대기
    
    # 연령 확인 모달 처리
    handle_age_verification(driver)
    
    details = {
        "연령 등급": None,
        "장르": None
    }
    
    try:
        # 정보 섹션 대기
        wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.product-details-info")))
        
        # 모든 정보 행 가져오기
        rows = driver.find_elements(By.CSS_SELECTOR, "div.product-details-info > div.row")
        
        for row in rows:
            try:
                # 라벨 추출
                label_element = row.find_element(By.CSS_SELECTOR, "div.left-col > strong")
                label = label_element.text.strip()
                
                # 연령 등급 처리
                if label == "연령 등급":
                    try:
                        age_img = row.find_element(By.CSS_SELECTOR, "div.right-col img")
                        details["연령 등급"] = age_img.get_attribute("alt")
                    except:
                        pass
                
                # 장르 처리 (새로운 구조 대응)
                elif label == "장르":
                    try:
                        # 장르 링크들 추출
                        genre_text = row.find_element(By.CSS_SELECTOR, "div.right-col").text
                        details["장르"] = genre_text.strip()
                    except:
                        # 위 방법 실패 시 일반 텍스트 방식 시도
                        try:
                            genre_div = row.find_element(By.CSS_SELECTOR, "div.right-col")
                            details["장르"] = genre_div.text.strip()
                        except:
                            pass
            except NoSuchElementException:
                continue
    
    except Exception as e:
        print(f"상세 정보 추출 중 오류 발생: {str(e)}")
    
    return details

result = []
for i, link in enumerate(game_links):
    print(f"[{i+1}/{len(game_links)}] {link} 처리 중...")
    
    try:
        driver.get(link)
        
        # 게임 기본 정보 수집
        name = wait.until(EC.visibility_of_element_located(
            (By.CSS_SELECTOR, "h1.product-name")
        )).text.strip()
        
        try:
            image = driver.find_element(By.CSS_SELECTOR, "img.product-main-image").get_attribute("src")
        except:
            image = None
            
        try:
            discount = driver.find_element(By.CSS_SELECTOR, "div.discount > p").text.strip()
        except:
            discount = "0%"
            
        try:
            current_price = driver.find_element(By.CSS_SELECTOR, "span.current-price").text.strip()
        except:
            current_price = None
            
        try:
            original_price = driver.find_element(By.CSS_SELECTOR, "span.prev-price").text.strip()
        except:
            original_price = current_price
            
        try:
            drm_tag = driver.find_element(By.CSS_SELECTOR, "li.game-drm.active").get_attribute("data-drm-name")
        except:
            drm_tag = None
        
        # 상세 정보 수집
        details = get_game_details(driver, link)
        
        result.append({
            "이름": name,
            "링크": link,
            "이미지": image,
            "할인률": discount,
            "할인가": current_price,
            "정가": original_price,
            "DRM": drm_tag,
            "연령 등급": details["연령 등급"],
            "장르": details["장르"]
        })
        
    except Exception as e:
        print(f"❗게임 처리 중 오류 발생: {str(e)}")
        # 오류 발생 시 스크린샷 저장 (디버깅용)
        driver.save_screenshot(f"error_{i+1}.png")
        continue

# ✅ 7. 저장
df = pd.DataFrame(result)
df.to_csv("greenmangaming_all_games_details.csv", index=False, encoding="utf-8-sig")
print(f"{len(df)}개의 게임 상세 정보를 저장했습니다.")

driver.quit()

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import urllib.parse
import time

# 크롬 드라이버 설정
def get_driver():
    options = Options()
    options.add_argument("--disable-blink-features=AutomationControlled")
    return webdriver.Chrome(service=Service(), options=options)

# 쿠키 동의 팝업 처리 (Accept All 클릭)
def handle_cookie_consent(driver):
    try:
        accept_btn = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.ID, "privacy_pref_optin"))
        )
        accept_btn.click()
        print("쿠키 동의 'Accept All' 클릭 완료")
        time.sleep(1)
    except Exception:
        print("쿠키 동의 팝업 없음 또는 이미 처리됨")

# 클릭 방식 연령 인증 처리
def handle_age_verification_click(driver):
    try:
        day_dropdown = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.ID, "day"))
        )
        day_dropdown.click()
        day_option = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.XPATH, "//select[@id='day']/option[@value='string:01']"))
        )
        day_option.click()
        time.sleep(0.5)

        month_dropdown = driver.find_element(By.ID, "month")
        month_dropdown.click()
        month_option = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.XPATH, "//select[@id='month']/option[@value='string:01']"))
        )
        month_option.click()
        time.sleep(0.5)

        year_dropdown = driver.find_element(By.ID, "year")
        year_dropdown.click()
        year_option = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.XPATH, "//select[@id='year']/option[@value='string:1990']"))
        )
        year_option.click()
        time.sleep(0.5)

        wait = WebDriverWait(driver, 10)
        confirm_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".btn-container button.btn.btn-success")))
        confirm_btn.click()

        driver.execute_script("arguments[0].scrollIntoView(true);", confirm_btn)
        time.sleep(0.3)
        driver.execute_script("arguments[0].click();", confirm_btn)
        print("연령 인증 완료 (클릭 방식)")
        time.sleep(2)
    except Exception as e:
        print(f"연령 인증 처리 실패 (클릭 방식): {e}")

# 상세 페이지에서 장르, 연령등급 추출
def extract_game_info(driver):
    try:
        genre = None
        age_rating = None

        # Angular 텍스트 바인딩이 렌더링될 때까지 대기
        WebDriverWait(driver, 10).until(
            lambda d: any(e.text.strip() != "" for e in d.find_elements(By.CSS_SELECTOR, ".left-col strong"))
        )

        rows = driver.find_elements(By.CSS_SELECTOR, "div.row")
        print(f"총 row 개수: {len(rows)}")

        for row in rows:
            try:
                label_elem = row.find_element(By.CSS_SELECTOR, ".left-col strong")
                value_elem = row.find_element(By.CSS_SELECTOR, ".right-col")

                label = label_elem.text.strip()
                print(f"label: {label}")

                if label == "장르":
                    genre = value_elem.text.strip()
                    print(f"장르: {genre}")
                elif label == "연령 등급":
                    try:
                        img = value_elem.find_element(By.TAG_NAME, "img")
                        age_rating = img.get_attribute("alt")
                        print(f"연령 이미지 alt: {age_rating}")
                    except:
                        age_rating = value_elem.text.strip()
                        print(f"연령 텍스트: {age_rating}")
            except Exception as inner_e:
                continue  # 해당 row에 요소가 없으면 넘어감

        return genre, age_rating

    except Exception as e:
        print(f"상세정보 추출 실패: {e}")
        return None, None



# 검색 → 상세페이지 진입 → 정보 추출
def get_game_details(driver, game_name):
    try:
        query = urllib.parse.quote(game_name)
        search_url = f"https://www.greenmangaming.com/ko/search/?query={query}"
        driver.get(search_url)
        time.sleep(2)

        # 쿠키 동의 팝업 처리
        handle_cookie_consent(driver)

        # 검색 결과의 첫 번째 항목 클릭
        first_game = WebDriverWait(driver, 5).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, "li.ais-Hits-item a"))
        )
        detail_url = first_game.get_attribute("href")
        driver.get(detail_url)
        time.sleep(2)

        # 연령 인증 처리 (클릭 방식)
        handle_age_verification_click(driver)

        # 장르 / 연령 등급 추출
        genre, age_rating = extract_game_info(driver)
        print(f"'{game_name}' 처리 완료 → 장르: {genre}, 연령등급: {age_rating}")
        return genre, age_rating

    except Exception as e:
        print(f"'{game_name}' 처리 실패: {e}")
        return None, None

# 메인 실행 함수
def main():
    driver = get_driver()
    df = pd.read_csv("greenmangaming_all_games.csv")
    results = []

    for name in df["이름"]:
        genre, age = get_game_details(driver, name)
        results.append({"이름": name, "장르": genre, "연령등급": age})

    driver.quit()

    # 결과 저장
    result_df = pd.DataFrame(results)
    result_df.to_csv("greenmangaming_game_details.csv", index=False, encoding="utf-8-sig")
    print("전체 작업 완료, 결과 저장됨: greenmangaming_game_details.csv")

if __name__ == "__main__":
    main()

In [16]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import csv

# 크롬 드라이버 설정
def configure_driver():
    options = Options()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    service = Service()
    driver = webdriver.Chrome(service=service, options=options)
    return driver

# 게임 정보 가져오기 함수
def get_game_info(driver, game_name):
    base_url = "https://www.greenmangaming.com/ko/search/?query="
    driver.get(base_url + game_name)

    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "li.ais-Hits-item"))
        )
        time.sleep(2)

        first_result = driver.find_element(By.CSS_SELECTOR, "li.ais-Hits-item")

        title = first_result.find_element(By.CSS_SELECTOR, ".prod-name").text.strip()
        try:
            discount = first_result.find_element(By.CSS_SELECTOR, ".discount p").text.strip()
        except:
            discount = "0%"

        try:
            prev_price = first_result.find_element(By.CSS_SELECTOR, ".prev-price").text.strip()
        except:
            prev_price = ""
        try:
            curr_price = first_result.find_element(By.CSS_SELECTOR, ".current-price").text.strip()
        except:
            curr_price = ""

        try:
            drm = first_result.find_element(By.CSS_SELECTOR, ".game-drm[ng-click]").get_attribute("data-drm-name")
        except:
            drm = ""

        try:
            detail_url = first_result.find_element(By.CSS_SELECTOR, 'a[href^="/games/"]').get_attribute("href")
        except:
            detail_url = ""

        return {
            '게임이름': title if title else game_name,
            '할인율': discount,
            '정가': prev_price if prev_price else curr_price,
            '할인가': curr_price,
            'DRM': drm,
            '상세URL': detail_url
        }

    except Exception as e:
        print(f"{game_name} 처리 중 오류 발생: {e}")
        return {
            '게임이름': game_name,
            '할인율': '',
            '정가': '',
            '할인가': '',
            'DRM': '',
            '상세URL': ''
        }

# 게임 이름 로딩
input_file = 'greenmangaming_all_games.csv'
df = pd.read_csv(input_file)
game_names = df['이름'].dropna().tolist()[:3]

# 크롤링 실행
results = []
driver = configure_driver()

for name in game_names:
    print(f"{name} 처리 중...")
    info = get_game_info(driver, name)
    results.append(info)
    time.sleep(1.5)

driver.quit()

# 결과 저장
output_file = 'greenmangaming_crawled_info_test.csv'
if results:
    with open(output_file, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=['게임이름', '할인율', '정가', '할인가', 'DRM', '상세URL'])
        writer.writeheader()
        writer.writerows(results)
    print(f"\n결과 저장됨: {output_file}")
else:
    print("\n저장할 데이터가 없습니다.")


Destiny 2: Year of Prophecy Ultimate Edition 처리 중...
Stellar Blade™ Complete Edition 처리 중...
Clair Obscur: Expedition 33 처리 중...

결과 저장됨: greenmangaming_crawled_info_test.csv


In [10]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# 크롬 드라이버 설정
def configure_driver():
    options = Options()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    service = Service()
    driver = webdriver.Chrome(service=service, options=options)
    return driver

# 게임 정보 가져오기 함수
def get_game_info(driver, game_name):
    base_url = "https://www.greenmangaming.com/ko/search/?query="
    driver.get(base_url + game_name)

    try:
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "li.ais-Hits-item"))
        )
        time.sleep(2)

        first_result = driver.find_element(By.CSS_SELECTOR, "li.ais-Hits-item")

        title = first_result.find_element(By.CSS_SELECTOR, ".prod-name").text.strip()
        try:
            discount = first_result.find_element(By.CSS_SELECTOR, ".discount p").text.strip()
        except:
            discount = "0%"

        try:
            prev_price = first_result.find_element(By.CSS_SELECTOR, ".prev-price").text.strip()
        except:
            prev_price = ""
        try:
            curr_price = first_result.find_element(By.CSS_SELECTOR, ".current-price").text.strip()
        except:
            curr_price = ""

        try:
            drm = first_result.find_element(By.CSS_SELECTOR, ".game-drm[ng-click]").get_attribute("data-drm-name")
        except:
            drm = ""

        try:
            detail_url = first_result.find_element(By.CSS_SELECTOR, 'a[href^="/games/"]').get_attribute("href")
            if not detail_url.startswith("http"):
                detail_url = "https://www.greenmangaming.com" + detail_url
        except:
            detail_url = ""

        try:
            image_url = first_result.find_element(By.CSS_SELECTOR, "img.search-main-image").get_attribute("src")
        except:
            image_url = ""

        return {
            '게임 이름': title if title else game_name,
            '할인율': discount.replace('%', ''),
            '원가': prev_price.replace("₩", "").replace(",", "") if prev_price else curr_price.replace("₩", "").replace(",", ""),
            '할인가': curr_price.replace("₩", "").replace(",", ""),
            '사이트 URL': detail_url,
            '플랫폼 이름': "Green Man Gaming",
            '이미지 URL': image_url
        }

    except Exception as e:
        print(f"{game_name} 처리 중 오류 발생: {e}")
        return None

# 데이터 불러오기
origin_df = pd.read_csv("merged_games_data.csv")
source_df = pd.read_csv("greenmangaming_all_games.csv")
game_names = source_df['이름'].dropna().tolist()[:3]

# 크롤링 실행
results = []
driver = configure_driver()
total = len(game_names)

for i, name in enumerate(game_names, start=1):
    print(f"{name} 처리 중... ({i}/{total}) 진행률: {i / total * 100:.1f}%")
    info = get_game_info(driver, name)
    if info:
        matched_row = origin_df[origin_df["게임 이름"] == info["게임 이름"]].head(1)
        genre = matched_row["장르"].values[0] if not matched_row.empty else ""
        age = matched_row["연령 등급"].values[0] if not matched_row.empty else ""

        results.append({
            "게임 이름": info["게임 이름"],
            "원가": info["원가"],
            "할인가": info["할인가"],
            "사이트 URL": info["사이트 URL"],
            "할인율": info["할인율"],
            "유저리뷰수": "0",
            "플랫폼 이름": info["플랫폼 이름"],
            "이미지 URL": info["이미지 URL"],
            "장르": genre,
            "연령 등급": age
        })
    time.sleep(1.5)

driver.quit()

# 결과 저장
if results:
    greenman_df = pd.DataFrame(results)
    greenman_df.to_csv("greenman_test_only.csv", index=False, encoding="utf-8-sig")
    print("\n✅ greenman_test_only.csv 저장 완료!")
else:
    print("\n❌ 저장할 데이터가 없습니다.")


Destiny 2: Year of Prophecy Ultimate Edition 처리 중... (1/3) 진행률: 33.3%
Stellar Blade™ Complete Edition 처리 중... (2/3) 진행률: 66.7%
Clair Obscur: Expedition 33 처리 중... (3/3) 진행률: 100.0%

✅ greenman_test_only.csv 저장 완료!
