# 에픽 게임즈 크롤링

In [54]:
!pip install selenium
!pip install fake-useragent

Collecting fake-useragent
  Downloading fake_useragent-2.2.0-py3-none-any.whl.metadata (17 kB)
Downloading fake_useragent-2.2.0-py3-none-any.whl (161 kB)
Installing collected packages: fake-useragent
Successfully installed fake-useragent-2.2.0


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
import requests
from sklearn.feature_extraction.text import CountVectorizer

# Chrome 옵션 설정
def configure_chrome_options():
    options = webdriver.ChromeOptions()
    # 실제 브라우저와 유사한 User-Agent 설정
    options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36')
    
    # 봇 탐지 우회를 위한 추가 설정
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--start-maximized')
    options.add_argument('--disable-infobars')
    options.add_argument('--disable-gpu') 
    return options

# 웹드라이버 설정
def setup_driver():
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=configure_chrome_options())

# 메인 페이지에서 게임 링크 수집
def collect_game_links(driver, url):
    driver.get(url)
    time.sleep(5)
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, 'a.css-g3jcms'))
    )
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    return [f"https://store.epicgames.com{card['href']}" for card in soup.select('a.css-g3jcms')]

# 상세 페이지 데이터 추출
def extract_game_details(driver, url):
    driver.get(url)
    WebDriverWait(driver, 30).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, 'div[data-testid="about-metadata-layout-column"]'))
    )
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    game_info = {
        '게임 이름': soup.select_one('h1.css-1gty6cv').text.strip() if soup.select_one('h1.css-1gty6cv') else 'N/A',
        '사이트 URL': url,
        '플랫폼 이름': 'Epic Games',
        '이미지 URL': extract_image_url(soup),
        '가격 정보': extract_price_info(soup),
        '장르': extract_genres(soup),
        '연령 등급': extract_age_rating(soup),
        '유저평점': 'N/A',
        '유저리뷰': 'N/A' 
    }
    return game_info

# 이미지 URL 추출
def extract_image_url(soup):
    img_selectors = [
        'div.css-1q03292 img',
        'img[data-testid="picture-image"]',
        'div.css-1m7k7qz img'
    ]
    for selector in img_selectors:
        img = soup.select_one(selector)
        if img and (img.get('src') or img.get('data-image')):
            return img.get('src') or img.get('data-image')
    return 'N/A'

# 가격 정보 추출
def extract_price_info(soup):
    price_data = {'원가': 'N/A', '할인가': 'N/A', '할인율': 'N/A'}
    
    # 할인 정보
    discount_el = soup.select_one('div[data-testid="add-on-price-notice"] span')
    if discount_el:
        price_data['할인율'] = discount_el.text.strip()
    
    # 가격 요소 선택
    price_el = soup.select_one('div.css-1xvn3kf')
    if price_el:
        # 원가 (취소선이 적용된 가격)
        original = price_el.select_one('span.css-119zqif')
        if not original:
            original = price_el.select_one('span.css-1p4w6lu')
        
        # 할인가 (현재 가격)
        sale = price_el.select_one('span.css-4jky3p')
        
        if original and sale:
            price_data['원가'] = original.text.strip()
            price_data['할인가'] = sale.text.strip()
        elif sale:
            price_data['할인가'] = sale.text.strip()
    
    return price_data

# 장르 추출
def extract_genres(soup):
    metadata = soup.select_one('div[data-testid="about-metadata-layout-column"]')
    if metadata:
        for label in metadata.find_all('p'):
            if 'Genre' in label.text:
                container = label.find_parent().find_parent().find_next_sibling('div')
                if container:
                    return ', '.join([a.text.strip() for a in container.select('a.css-cyjj8t')])
    return 'N/A'

# 연령 등급 추출
def extract_age_rating(soup):
    rating_div = soup.select_one('div[data-testid="ratings-image"]')
    if rating_div and rating_div.img:
        alt_text = rating_div.img.get('alt', '')
        age_match = re.search(r'\d+', alt_text)
        return f"{age_match.group()}세" if age_match else alt_text
    return 'N/A'

# 데이터 분석용 장르 벡터화
def analyze_genres(df):
    if not df.empty and '장르' in df.columns:
        vectorizer = CountVectorizer(tokenizer=lambda x: x.split(', '))
        genre_matrix = vectorizer.fit_transform(df['장르'])
        genre_counts = pd.DataFrame(genre_matrix.toarray(), columns=vectorizer.get_feature_names_out()).sum()
        return genre_counts.sort_values(ascending=False)
    return pd.Series()

# 메인 실행 함수
def main():
    driver = setup_driver()
    main_url = 'https://store.epicgames.com/en/browse?sortBy=relevancy&sortDir=DESC&category=Game&count=40&start=0'
    
    try:
        print("게임 링크 수집 중...")
        game_links = collect_game_links(driver, main_url)
        print(f"총 {len(game_links)}개의 게임 발견")
        
        game_data = []
        for i, link in enumerate(game_links[:10]):
            print(f"{i+1}/{len(game_links)} 처리 중: {link}")
            game_data.append(extract_game_details(driver, link))
            time.sleep(1.5)  # 서버 부하 방지
        
        # 데이터프레임 생성
        df = pd.DataFrame(game_data)
        
        # 가격 정보 분리
        price_df = pd.json_normalize(df['가격 정보'])
        df = pd.concat([df.drop('가격 정보', axis=1), price_df], axis=1)
        
        # 장르 분석
        genre_analysis = analyze_genres(df)
        print("\n장르 분포 상위 10개:")
        print(genre_analysis.head(10))
        
        # CSV 저장
        df.to_csv('epic_games_enhanced.csv', index=False, encoding='utf-8-sig')
        print("\n데이터 저장 완료!")
        
    except Exception as e:
        print(f"오류 발생: {str(e)}")
    finally:
        driver.quit()

if __name__ == "__main__":
    main()

게임 링크 수집 중...
총 40개의 게임 발견
1/40 처리 중: https://store.epicgames.com/en-US/p/dummynation-40505c
오류 발생: Message: 
Stacktrace:
	GetHandleVerifier [0x0x97ba83+63395]
	GetHandleVerifier [0x0x97bac4+63460]
	(No symbol) [0x0x7c2113]
	(No symbol) [0x0x80a85e]
	(No symbol) [0x0x80abfb]
	(No symbol) [0x0x852f92]
	(No symbol) [0x0x82f3f4]
	(No symbol) [0x0x8507ba]
	(No symbol) [0x0x82f1a6]
	(No symbol) [0x0x7fe7b2]
	(No symbol) [0x0x7ff654]
	GetHandleVerifier [0x0xbf8883+2672035]
	GetHandleVerifier [0x0xbf3cba+2652634]
	GetHandleVerifier [0x0x9a2bca+223466]
	GetHandleVerifier [0x0x992cb8+158168]
	GetHandleVerifier [0x0x99978d+185517]
	GetHandleVerifier [0x0x983b78+96408]
	GetHandleVerifier [0x0x983d02+96802]
	GetHandleVerifier [0x0x96e90a+9770]
	BaseThreadInitThunk [0x0x754a5d49+25]
	RtlInitializeExceptionChain [0x0x775fd03b+107]
	RtlGetAppContainerNamedObjectPath [0x0x775fcfc1+561]



In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd

def configure_chrome_options():
    options = webdriver.ChromeOptions()
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--start-maximized')
    return options

def setup_driver():
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=configure_chrome_options())

def collect_game_data(driver, url):
    driver.get(url)

    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, 'div[data-testid="one-line-text"]'))
    )

    soup = BeautifulSoup(driver.page_source, 'html.parser')
    cards = soup.select('a.css-g3jcms') 

    game_list = []
    for card in cards:
        # 게임 링크
        game_url = "https://store.epicgames.com" + card['href'] if card.has_attr('href') else 'N/A'

        # 게임 이름
        name_tag = card.select_one('div[data-testid="one-line-text"]')
        game_name = name_tag.text.strip() if name_tag else 'N/A'

        # 이미지 URL
        img_tag = card.select_one('img[data-testid="picture-image"]')
        img_url = img_tag['data-image'] if img_tag and img_tag.has_attr('data-image') else 'N/A'

        # 가격
        price_tag = card.select_one('span[data-testid="offer-price"]')
        price = price_tag.text.strip() if price_tag else '가격 정보 없음'

        # 출시일 (출시 예정 여부 포함)
        date_tag = card.select_one('div.css-10kqwxf')  # 날짜가 들어있는 div
        release_info = date_tag.text.strip() if date_tag else '출시됨'

        game_list.append({
            '게임 이름': game_name,
            '게임 링크': game_url,
            '이미지 URL': img_url,
            '가격': price,
            '출시일': release_info
        })

    return game_list

# 실행
if __name__ == "__main__":
    driver = setup_driver()
    try:
        url = 'https://store.epicgames.com/ko/browse?sortBy=relevancy&sortDir=DESC&category=Game&count=40&start=0'
        data = collect_game_data(driver, url)
        df = pd.DataFrame(data)
        print(df.head())
        df.to_csv("epicgames_games_summary.csv", index=False, encoding='utf-8-sig')
    finally:
        driver.quit()


         게임 이름                                              게임 링크  \
0  Dummynation  https://store.epicgames.com/ko/p/dummynation-4...   
1          N/A  https://store.epicgames.com/ko/p/pure-instinct...   
2       MEMREC     https://store.epicgames.com/ko/p/memrec-bb1981   
3          N/A  https://store.epicgames.com/ko/p/destiny-code-...   
4          N/A  https://store.epicgames.com/ko/p/bodycamera-ch...   

                                             이미지 URL        가격  \
0  https://cdn1.epicgames.com/spt-assets/911e0671...  가격 정보 없음   
1  https://cdn1.epicgames.com/spt-assets/b1570591...  가격 정보 없음   
2  https://cdn1.epicgames.com/spt-assets/9a6d6571...  가격 정보 없음   
3  https://cdn1.epicgames.com/spt-assets/1cb8ecee...  가격 정보 없음   
4  https://cdn1.epicgames.com/spt-assets/0c49da48...  가격 정보 없음   

                 출시일  
0  25. 08. 01. 이용 가능  
1     2025년 8월 이용 가능  
2     2025년 8월 이용 가능  
3         2025 이용 가능  
4  25. 08. 08. 이용 가능  


In [None]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time

def get_epic_game_info_with_selenium(query):
    options = Options()
    options.add_argument("--disable-gpu")
    options.add_argument("--window-size=1920x1080")
    options.add_argument("user-agent=Mozilla/5.0")

    driver = webdriver.Chrome(options=options)
    url = f"https://store.epicgames.com/ko/browse?q={query}&sortBy=relevancy&sortDir=DESC&count=40"
    driver.get(url)
    time.sleep(3)

    soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()

    first_card = soup.select_one('div[data-component="BrowseOfferCard"]')
    if not first_card:
        return None

    title_elem = first_card.select_one('[data-testid="offer-card-image-portrait"] img')
    game_title = title_elem["alt"] if title_elem else "제목 없음"
    image_url = title_elem["src"] if title_elem else None
    game_link = first_card.select_one("a")["href"]

    discount_elem = first_card.select_one('span:-soup-contains("%")')
    original_price_elem = first_card.select_one('span.css-4jky3p')
    discounted_price_elem = first_card.select_one('span.css-12s1vua')

    discount = discount_elem.get_text() if discount_elem else "0%"
    original_price = original_price_elem.get_text() if original_price_elem else None
    discounted_price = discounted_price_elem.get_text() if discounted_price_elem else original_price

    return {
        "영어 게임 이름": query,
        "한글 게임 이름": game_title,
        "할인율": discount,
        "원가": discounted_price,
        "할인가": original_price,
        "이미지 URL": image_url,
        "사이트 URL": "https://store.epicgames.com" + game_link,
        "DRM" : '에픽 게임즈'
    }

# 기존 CSV 파일에서 '이름' 컬럼만 불러오기
gm_df = pd.read_csv("greenmangaming_all_games.csv")
game_names = gm_df['이름'].dropna().unique()[:3]  # 중복 제거 및 NaN 제거

# 각 이름으로 Epic 검색 후 결과 저장
results = []
for name in game_names:
    print(f"검색 중: {name}")
    info = get_epic_game_info_with_selenium(name)
    if info:
        results.append(info)

# 결과를 새로운 CSV로 저장
if results:
    df = pd.DataFrame(results)
    df.to_csv("epic_matches.csv", index=False, encoding="utf-8-sig")
    print("저장 완료: epic_matches.csv")
else:
    print("검색 결과가 없습니다.")


검색 중: Destiny 2: Year of Prophecy Ultimate Edition
검색 중: Stellar Blade™ Complete Edition
검색 중: Clair Obscur: Expedition 33
저장 완료: epic_matches.csv


In [7]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time

def get_epic_game_info_with_selenium(query):
    options = Options()
    options.add_argument("--disable-gpu")
    options.add_argument("--window-size=1920x1080")
    options.add_argument("user-agent=Mozilla/5.0")

    driver = webdriver.Chrome(options=options)
    url = f"https://store.epicgames.com/ko/browse?q={query}&sortBy=relevancy&sortDir=DESC&count=40"
    driver.get(url)
    time.sleep(3)

    soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()

    first_card = soup.select_one('div[data-component="BrowseOfferCard"]')
    if not first_card:
        return None

    title_elem = first_card.select_one('[data-testid="offer-card-image-portrait"] img')
    game_title = title_elem["alt"] if title_elem else query
    image_url = title_elem["src"] if title_elem else None
    game_link = first_card.select_one("a")["href"]

    discount_elem = first_card.select_one('span:-soup-contains("%")')
    original_price_elem = first_card.select_one('span.css-4jky3p')
    discounted_price_elem = first_card.select_one('span.css-12s1vua')

    discount = discount_elem.get_text().replace("%", "") if discount_elem else "0"
    original_price = original_price_elem.get_text().replace("₩", "").replace(",", "") if original_price_elem else ""
    discounted_price = discounted_price_elem.get_text().replace("₩", "").replace(",", "") if discounted_price_elem else original_price

    return {
        "게임 이름": query,
        "원가": original_price.strip(),
        "할인가": discounted_price.strip(),
        "사이트 URL": "https://store.epicgames.com" + game_link,
        "할인율": discount,
        "이미지 URL": image_url
    }

# 1. 기존 CSV에서 불러오기
merged_df = pd.read_csv("merged_games_data.csv")
game_names = merged_df["게임 이름"].dropna().unique()

# 2. 결과 저장용 리스트
epic_rows = []

for i, name in enumerate(game_names):
    print(f"[{i+1}/3] Epic 검색 중: {name}")
    result = get_epic_game_info_with_selenium(name)

    if result:
        # 관련된 장르 및 연령 등급 가져오기 (Steam 데이터에서 복사)
        matching_row = merged_df[merged_df["게임 이름"] == name].iloc[0]
        genre = matching_row["장르"] if "장르" in matching_row else ""
        rating = matching_row["연령 등급"] if "연령 등급" in matching_row else ""

        # 원가가 비어 있으면 할인가로 대체
        if not result["원가"]:
            result["원가"] = result["할인가"]

        # 최종 구조 맞추기
        epic_rows.append({
            "게임 이름": name,
            "원가": result["원가"],
            "할인가": result["할인가"],
            "사이트 URL": result["사이트 URL"],
            "할인율": result["할인율"],
            "유저리뷰수": 0,  # Epic은 리뷰 없음
            "플랫폼 이름": "Epic",
            "이미지 URL": result["이미지 URL"],
            "장르": genre,
            "연령 등급": rating
        })
    else:
        print(f" - {name} 결과 없음")

# 3. DataFrame으로 저장
if epic_rows:
    df_epic = pd.DataFrame(epic_rows)
    df_epic.to_csv("epic_games_sample.csv", index=False, encoding="utf-8-sig")
    print("✅ 저장 완료: epic_games_sample.csv")
else:
    print("❌ 결과 없음. 파일 저장 생략됨.")


[1/3] Epic 검색 중: Dead by Daylight
[2/3] Epic 검색 중: Grounded 2
 - Grounded 2 결과 없음
[3/3] Epic 검색 중: Ready or Not
[4/3] Epic 검색 중: PEAK
[5/3] Epic 검색 중: Mage Arena
 - Mage Arena 결과 없음
[6/3] Epic 검색 중: Tales of the Shire: A The Lord of The Rings™ Game
 - Tales of the Shire: A The Lord of The Rings™ Game 결과 없음
[7/3] Epic 검색 중: Dead by Daylight: The Walking Dead
[8/3] Epic 검색 중: Grounded 2: Founder’s Pack
 - Grounded 2: Founder’s Pack 결과 없음
[9/3] Epic 검색 중: RimWorld
[10/3] Epic 검색 중: DOOM: The Dark Ages
 - DOOM: The Dark Ages 결과 없음
[11/3] Epic 검색 중: Kingdom Come: Deliverance II
[12/3] Epic 검색 중: ELDEN RING NIGHTREIGN
 - ELDEN RING NIGHTREIGN 결과 없음
[13/3] Epic 검색 중: Warhammer 40,000: Space Marine 2
[14/3] Epic 검색 중: Baldur's Gate 3
 - Baldur's Gate 3 결과 없음
[15/3] Epic 검색 중: Witchfire
 - Witchfire 결과 없음
[16/3] Epic 검색 중: R.E.P.O.
 - R.E.P.O. 결과 없음
[17/3] Epic 검색 중: EA SPORTS FC™ 26
[18/3] Epic 검색 중: Borderlands 4
[19/3] Epic 검색 중: ELDEN RING
[20/3] Epic 검색 중: Grand Theft Auto V Enhanced
[21/3