In [20]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy as np
import json
import math
from tqdm import notebook
import time
import csv
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

### 데이터 불러오기
- 같은 폴더에 game_숫자 엑셀 파일

In [2]:
game_list = pd.read_excel('gamedata/game_5.xlsx', usecols=[1, 2])
game_list

Unnamed: 0,appid,name
0,1780820,Chill Corner - Piano & Guitar (Music Album)
1,1780830,Hundred Days - Napa Valley
2,1780840,Loch Ness
3,1780850,Cthulhu Dungeon
4,1780880,Queue Simulator
...,...,...
9872,2163690,Dicknosaurus Prologue
9873,2163700,In The Workplace Of Madness - ONE ROOM DEMO
9874,2163710,Aevumblade Chronicles
9875,2163720,Raige Demo


In [3]:
col_list = [
'short_description',
'price',
'categories',
'genres',
'recommendations',
'release_date',
'developers',
'metacritic',
'image'
]

In [4]:
for col in col_list : 
    game_list[f'{col}'] = None

In [5]:
game_list = game_list.set_index('appid')

In [6]:
game_list

Unnamed: 0_level_0,name,short_description,price,categories,genres,recommendations,release_date,developers,metacritic,image
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1780820,Chill Corner - Piano & Guitar (Music Album),,,,,,,,,
1780830,Hundred Days - Napa Valley,,,,,,,,,
1780840,Loch Ness,,,,,,,,,
1780850,Cthulhu Dungeon,,,,,,,,,
1780880,Queue Simulator,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
2163690,Dicknosaurus Prologue,,,,,,,,,
2163700,In The Workplace Of Madness - ONE ROOM DEMO,,,,,,,,,
2163710,Aevumblade Chronicles,,,,,,,,,
2163720,Raige Demo,,,,,,,,,


# 수집


In [7]:
# 데이터가 없는 오류를 가진 appid 
no_data_appid = []

appList = []
index = 0

for appid in notebook.tqdm(game_list.index) :
    time.sleep(1.5)

    try:
        response = requests.get(f'http://store.steampowered.com/api/appdetails?appids={appid}&l=koreana').json()
    except:
        continue
        
    game = response[f'{appid}']
    
    # 1. success == False인 경우 제외
    if(game['success'] == False): 
        continue


    # 2. 원하는 data가 없는 경우 제외
    data = game['data']
    
    try:
        price = data['price_overview']['final']
        game_list.loc[appid, 'price'] = price
    except:
        no_data_appid.append(appid)
        continue

    try:
        genres = data['genres'] # 변형
        genre_list = []

        for g in genres :
            genre_list.append(g['description'])

        game_list.loc[appid, 'genres'] = genre_list
    except:
        no_data_appid.append(appid)
        continue

    try:
        categories = data['categories'] # 변형
        category_list = []
        for c in categories :
            category_list.append(c['description'])


        game_list.loc[appid, 'categories'] = category_list

    except:
        no_data_appid.append(appid)
        continue

    try:
        descriptions = data['short_description']
        game_list.loc[appid, 'short_description'] = descriptions
    except:
        no_data_appid.append(appid)

    try:
        recommendations = data['recommendations']['total']
        game_list.loc[appid, 'recommendations'] = recommendations
    except:
        no_data_appid.append(appid)

    try:
        metacritic = data['metacritic']['score']
        game_list.loc[appid, 'metacritic'] = metacritic
    except:
        no_data_appid.append(appid)


    try:
        developers = data['developers']
        game_list.loc[appid, 'developers'] = developers
    except:
        no_data_appid.append(appid)


    try:
        release = data['release_date']['date']
        game_list.loc[appid, 'release_date'] = release
    except:
        no_data_appid.append(appid)

    try:
        image = data['header_image']
        game_list.loc[appid, 'image'] = image
    except:
        no_data_appid.append(appid)



  0%|          | 0/9877 [00:00<?, ?it/s]

## 엑셀로 저장하기

In [8]:
# 이름 수정하기 !! 
game_list.to_excel('test_to_excel.xlsx')


In [36]:
game_list

Unnamed: 0_level_0,name,short_description,price,categories,genres,recommendations,release_date,developers,metacritic,image
appid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1780820,Chill Corner - Piano & Guitar (Music Album),This DLC added 21 new soundtracks with the pri...,110000,"[싱글 플레이어, 다운로드 가능한 콘텐츠, Steam 도전 과제, Steam Cloud]","[캐주얼, 무료, 인디, 시뮬레이션]",,2021년 12월 17일,[Low-Hi Tech],,https://cdn.akamai.steamstatic.com/steam/apps/...
1780830,Hundred Days - Napa Valley,Explore the napa valley region and its grape v...,950000,"[싱글 플레이어, 다운로드 가능한 콘텐츠, Steam 도전 과제, 컨트롤러 완벽 지...","[인디, 시뮬레이션, 전략]",,2021년 12월 21일,[Broken Arms Games],,https://cdn.akamai.steamstatic.com/steam/apps/...
1780840,Loch Ness,Loch Ness is a 1-4 Player Co-Op Horror Hunter....,1450000,"[싱글 플레이어, 멀티플레이어, 협동, 온라인 협동, Steam 도전 과제]","[인디, 앞서 해보기]",,2021년 12월 3일,[JFi Games],,https://cdn.akamai.steamstatic.com/steam/apps/...
1780850,Cthulhu Dungeon,《不可名状的地牢》是一款以克苏鲁的呼唤跑团为背景的 战棋+DBG+Roguelike游戏，通...,750000,[싱글 플레이어],"[인디, 전략, 앞서 해보기]",,2022년 2월 18일,[SanYeGame],,https://cdn.akamai.steamstatic.com/steam/apps/...
1780880,Queue Simulator,Queue simulator brings you your favorite part ...,110000,"[싱글 플레이어, Steam 도전 과제]","[캐주얼, 인디, 대규모 멀티플레이어, RPG, 시뮬레이션]",,2021년 12월 15일,[Just Making Games],,https://cdn.akamai.steamstatic.com/steam/apps/...
...,...,...,...,...,...,...,...,...,...,...
2163690,Dicknosaurus Prologue,,,,,,,,,
2163700,In The Workplace Of Madness - ONE ROOM DEMO,,,,,,,,,
2163710,Aevumblade Chronicles,,,,,,,,,
2163720,Raige Demo,,,,,,,,,


In [37]:
# 상위 5000개의 데이터를 data에 저장
gamedata = game_list.head(5000)
# gamedata.info()

# genres 열에 존재하는 모든 결측값을 전부 카운트하여 출력
print('short_description 열의 결측값의 수 : ', gamedata['short_description'].isnull().sum())

short_description 열의 결측값의 수 :  2284


In [38]:
# 결측값을 빈값으로 대체한다.
gamedata['short_description'] = gamedata['short_description'].fillna('') 
print('short_description 열의 결측값의 수 : ', gamedata['short_description'].isnull().sum())

short_description 열의 결측값의 수 :  0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gamedata['short_description'] = gamedata['short_description'].fillna('')


In [39]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(gamedata['short_description'])
print('TF-IDF 행렬의 크기 : ', tfidf_matrix.shape)


TF-IDF 행렬의 크기 :  (5000, 13086)


In [40]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
print('코사인 유사도 연산 결과 :',cosine_sim.shape)

코사인 유사도 연산 결과 : (5000, 5000)


In [56]:
# gamedata.reset_index(drop = False,inplace=True)
# gamedata.reset_index(drop=False, inplace=True)
gamedata

name_to_index = dict(zip(gamedata['appid'], gamedata.index))
name_to_index
# 영화 제목 Father of the Bride Part II의 인덱스를 리턴
idx = name_to_index[1780840]
print(idx)

2


In [62]:
def get_recommendation(appid, cosine_sim=cosine_sim):
    # 선택한 게임의 id로 부터 해당 영화의 인덱스를 받아온다.
    idx = name_to_index[appid]
    
    # 해당 게임과 모든 게임의 유사도를 가져온다.
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # 유사도에 따라 게임을 정렬한다.
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True )
    
    # 가장 유사한 게임 5개를 받아온다
    sim_scores = sim_scores[1:11]
    
    # 가장 유사한 5개 게임의 인덱스를 얻는다.
    game_indices = [idx[0] for idx in sim_scores]
    
    # 가장 유사한 5개의 영화 제목을 리턴한다.
    return gamedata['name'].iloc[game_indices]

get_recommendation(1780880)
    

819     Grim Tales: Crimson Hollow Collector's Edition
4575                                      Gnome Online
4763                                             动物园怪谈
3975                  Fairground Power Polyp Simulator
4605                                       Village RPG
76                              Play Outside Simulator
2935                            Idle Trading Simulator
190                                   Steps From Above
4732             Lights, Camera, Reaction! IRL Edition
2923                               Thats Life of Ralph
Name: name, dtype: object