In [1]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy as np
import json
import math
from tqdm import notebook
import time
import csv

---

# 데이터 수집

### 데이터 불러오기
- 같은 폴더에 game_숫자 엑셀 파일

In [24]:
game_list = pd.read_excel('./game_free_1.xlsx', usecols=[1, 2])

In [16]:
game_list

Unnamed: 0,appid,name
0,216938,Pieterw test app76 ( 216938 )
1,660010,test2
2,660130,test3
3,1610510,放置勇者：远征/Idle Heroes:Odyssey-弗雷斯特Forrester
4,1610530,The Annoying Game
...,...,...
14362,1489000,The World's Hardest Game 3D
14363,1489010,The Picture in The House
14364,1489020,Eternal Secret
14365,1489050,Spaceflux Demo


In [25]:
# 받고자하는 데이터에 대한 열 생성

col_list = [
'short_description',
'price',
'categories',
'genres',
'recommendations',
'release_date',
'developers',
'metacritic',
'image',
'about_the_game',
'screenshots'
]

In [26]:
for col in col_list : 
    game_list[f'{col}'] = None

In [27]:
# appid를 인덱스로 설정 (편의상)
game_list = game_list.set_index('appid')

## 수집
주의점
- 단기간의 너무 많은 요청을 보내면 429 에러 발생 → 1개의 요청마다 1.5초의 time.sleep 
    - 참고 : stackoverflow : 1개의 요청마다 1.5초 or 200개마다 5분
- 상품이 아닌 app 존재 → success 값 확인
- price, genres, categories는 프로젝트의 필수 데이터 → 없다면 continue (데이터를 끝까지 수집X)

기타
- tqdm 라이브러리 사용 → 현재 진행 상황 확인 및 예상 시간 확인
    - 약 1만개의 데이터를 수집하는데 6시간 소요

In [28]:
# 데이터가 없는 오류를 가진 appid 
no_data_appid = []

for appid in notebook.tqdm(game_list.index) :
    time.sleep(1.5)

    try:
        response = requests.get(f'http://store.steampowered.com/api/appdetails?appids={appid}&l=koreana').json()
    except:
        continue
        
    game = response[f'{appid}']
    
    # 1. success == False인 경우 제외
    if(game['success'] == False): 
        continue

    data = game['data']

    # 3. 유료 게임인 경우 제외
    if(data['is_free'] == False):
        continue
        
    # 4. DLC인 경우 제외
    if(data['type'] != 'game'):
        continue

    game_list.loc[appid, 'price'] = None


    try:
        genres = data['genres'] # 변형
        genre_list = []

        for g in genres :
            genre_list.append(g['description'])

        game_list.loc[appid, 'genres'] = genre_list
    except:
        no_data_appid.append(appid)
        continue

    try:
        categories = data['categories'] # 변형
        category_list = []
        for c in categories :
            category_list.append(c['description'])


        game_list.loc[appid, 'categories'] = category_list

    except:
        no_data_appid.append(appid)
        continue

    try:
        descriptions = data['short_description']
        game_list.loc[appid, 'short_description'] = descriptions
    except:
        no_data_appid.append(appid)

    try:
        recommendations = data['recommendations']['total']
        game_list.loc[appid, 'recommendations'] = recommendations
    except:
        no_data_appid.append(appid)

    try:
        metacritic = data['metacritic']['score']
        game_list.loc[appid, 'metacritic'] = metacritic
    except:
        no_data_appid.append(appid)


    try:
        developers = data['developers']
        game_list.loc[appid, 'developers'] = developers
    except:
        no_data_appid.append(appid)


    try:
        release = data['release_date']['date']
        game_list.loc[appid, 'release_date'] = release
    except:
        no_data_appid.append(appid)

    try:
        image = data['header_image']
        game_list.loc[appid, 'image'] = image
    except:
        no_data_appid.append(appid)
        
    try:
        about_the_game = data['about_the_game']
        game_list.loc[appid, 'about_the_game'] = about_the_game
    except:
        no_data_appid.append(appid)
        
    try:
        screenshots = data['screenshots']
        screenshot_list = []
        for s in screenshots :
            screenshot_list.append(s['path_full'])


        game_list.loc[appid, 'screenshots'] = screenshot_list
    except:
        no_data_appid.append(appid)
        



  0%|          | 0/9179 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [31]:
game_list[:190]

Unnamed: 0_level_0,name,short_description,price,categories,genres,recommendations,release_date,developers,metacritic,image,about_the_game,screenshots
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
216938,Pieterw test app76 ( 216938 ),,,,,,,,,,,
660010,test2,,,,,,,,,,,
660130,test3,,,,,,,,,,,
1610510,放置勇者：远征/Idle Heroes:Odyssey-弗雷斯特Forrester,,,,,,,,,,,
1610540,Train Station Renovation - Germany DLC,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
1605890,To the Max,An entertainingly reckless true-story visual n...,,"[싱글 플레이어, Steam 도전 과제]","[인디, 앞서 해보기]",,2021년 9월 4일,[First Triangle],,https://cdn.akamai.steamstatic.com/steam/apps/...,'To the Max' details true stories of a Massach...,[https://cdn.akamai.steamstatic.com/steam/apps...
1605960,Zombie Survival,ZS 온라인은 10분~16분 안의 짧은 시간 안에 즐길 수 있는 탑다운 2D 슈팅 ...,,"[싱글 플레이어, 멀티플레이어, PvP, 온라인 PvP, Steam 도전 과제]","[캐주얼, 무료, 인디, 대규모 멀티플레이어]",,2021년 4월 29일,[SexyM],,https://cdn.akamai.steamstatic.com/steam/apps/...,<strong>ZS 온라인은 10분~16분 안의 짧은 시간 안에 즐길 수 있는 탑다...,[https://cdn.akamai.steamstatic.com/steam/apps...
1605970,Torque Drift - AdamLZ S15 Driver Car,,,,,,,,,,,
1605971,Torque Drift - AdamLZ 180sx Driver Car,,,,,,,,,,,


## 엑셀로 저장하기
각자 모은 데이터 수합을 위해 저장

In [22]:
# 이름 수정하기 !! 
game_list.to_excel('game_1_data.xlsx')

In [None]:
# 잘 수집 되었는지 확인 (앞 50개)
game_list.head(50)

In [23]:
# 잘 수집 되었는지 확인 (뒤 50개)
game_list.tail(50)

Unnamed: 0_level_0,name,short_description,price,categories,genres,recommendations,release_date,developers,metacritic,image,about_the_game,screenshots,type
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1542230,Super Jigsaw Puzzle: Generations - Fantasy,Fantasy images!,750000.0,"[싱글 플레이어, 다운로드 가능한 콘텐츠, Steam 도전 과제, Steam Clo...","[캐주얼, 무료, 인디, 전략]",,2021년 4월 1일,[Flat Cat Games],,https://cdn.akamai.steamstatic.com/steam/apps/...,Fantasy images!<br><br><strong>Content</strong...,[https://cdn.akamai.steamstatic.com/steam/apps...,dlc
1542240,Helicopter Simulator 2020 Demo,,,,,,,,,,,,
1542280,Stardom 2,*** This game is only available in Traditional...,850000.0,"[싱글 플레이어, Steam Cloud]","[캐주얼, 시뮬레이션]",253.0,2022년 3월 30일,[SOFTSTAR ENTERTAINMENT],,https://cdn.akamai.steamstatic.com/steam/apps/...,*** This game is only available in Traditional...,[https://cdn.akamai.steamstatic.com/steam/apps...,game
1542290,Girls and Dragons,This game is a classic memory game with severa...,220000.0,"[싱글 플레이어, Steam 도전 과제]","[캐주얼, 인디]",,2021년 4월 20일,[IR Studio],,https://cdn.akamai.steamstatic.com/steam/apps/...,Help the beautiful girls to immerse themselves...,[https://cdn.akamai.steamstatic.com/steam/apps...,game
1542310,Super Jigsaw Puzzle: Generations - Second Anni...,,,,,,,,,,,,
1542320,Hero Team Preview,,,,,,,,,,,,
1542330,Assimilation X,Embody the simulation of a dead mysterious man...,330000.0,[싱글 플레이어],"[액션, 어드벤처, 캐주얼]",,2021년 3월 12일,[AvrillionArts],,https://cdn.akamai.steamstatic.com/steam/apps/...,"<img src=""https://cdn.akamai.steamstatic.com/s...",[https://cdn.akamai.steamstatic.com/steam/apps...,game
1542340,Gunborg: Dark Matters Demo,,,,,,,,,,,,
1542350,Nothing to be done,,,,,,,,,,,,
1542360,Stormworks: Search and Destroy,Stormworks: Search and Destroy is the weapons ...,1050000.0,"[싱글 플레이어, 멀티플레이어, PvP, 온라인 PvP, 협동, 온라인 협동, 플랫...","[시뮬레이션, 전략]",789.0,2021년 10월 5일,[Geometa],,https://cdn.akamai.steamstatic.com/steam/apps/...,"<img src=""https://cdn.akamai.steamstatic.com/s...",[https://cdn.akamai.steamstatic.com/steam/apps...,dlc
