<a href="https://colab.research.google.com/github/genkke0653/PM_anal_games/blob/main/Grounded2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests, pandas as pd, difflib, json, time
from datetime import datetime, timedelta

# 1) 전체 App 리스트 수집 (키 불필요)
APP_LIST_URL = "https://api.steampowered.com/ISteamApps/GetAppList/v2/"

def load_app_list(cache_path="applist.json"):
    try:
        with open(cache_path, "r", encoding="utf-8") as f:
            return json.load(f)
    except:
        data = requests.get(APP_LIST_URL, timeout=30).json()
        with open(cache_path, "w", encoding="utf-8") as f:
            json.dump(data, f)
        return data

def find_appid_by_name(name, applist):
    rows = applist["applist"]["apps"]
    names = {row["name"]: row["appid"] for row in rows if row["name"]}
    # 근사 매칭
    best = difflib.get_close_matches(name, names.keys(), n=1, cutoff=0.6)
    return names.get(best[0]) if best else None

applist = load_app_list()
appid = find_appid_by_name("Grounded 2", applist)
print("APPID:", appid)  # 예시 출력
# Grounded 2 : 2661300
#
GetAppList

APPID: 2661300


In [9]:
BASE_REVIEWS = "https://store.steampowered.com/appreviews/{appid}"
HEADERS = {"User-Agent": "Mozilla/5.0"}

def fetch_recent_reviews(appid, months=6, lang="all", per_page=100, sleep=1.1):
    since = datetime.utcnow() - timedelta(days=30*months)
    cursor = "*"
    all_rows = []

    while True:
        params = {
            "json": 1,
            "filter": "recent",        # 최근 작성순
            "language": lang,          # "korean"으로 바꾸면 한글 리뷰만
            "purchase_type": "all",
            "num_per_page": per_page,
            "cursor": cursor
        }
        r = requests.get(BASE_REVIEWS.format(appid=appid), params=params, headers=HEADERS, timeout=30)
        j = r.json()
        rows = j.get("reviews", [])
        if not rows: break

        for rv in rows:
            ts = datetime.utcfromtimestamp(rv["timestamp_created"])
            if ts < since:
                return pd.DataFrame(all_rows) if all_rows else pd.DataFrame()
            all_rows.append({
                "timestamp": ts,
                "voted_up": rv["voted_up"],
                "review": rv.get("review",""),
                "playtime_forever": rv["author"].get("playtime_forever", 0),
                "playtime_2weeks": rv["author"].get("playtime_last_two_weeks", 0),
                "language": rv.get("language","")
            })

        cursor = j.get("cursor")
        if not cursor: break
        time.sleep(sleep)  # 예의 있는 크롤링

    return pd.DataFrame(all_rows)

reviews_df = fetch_recent_reviews(appid, months=6, lang="english")
print(reviews_df.head(), len(reviews_df))

  since = datetime.utcnow() - timedelta(days=30*months)
  ts = datetime.utcfromtimestamp(rv["timestamp_created"])


            timestamp  voted_up  \
0 2025-09-09 01:19:44      True   
1 2025-09-09 01:05:22      True   
2 2025-09-08 23:48:43      True   
3 2025-09-08 23:45:31      True   
4 2025-09-08 22:15:16      True   

                                              review  playtime_forever  \
0  I have been waiting for this game since the fi...              1542   
1  just finished grounded 1 and this feels so goo...               955   
2  its not as poorly optimized as i initially tho...               553   
3  Running this on an RX6600 and an old ass i7, s...              1504   
4  If you loved Grounded 1 you will like this gam...              2638   

   playtime_2weeks language  
0                0  english  
1              955  english  
2              553  english  
3             1504  english  
4               73  english   8034


In [10]:
# UTC → KST 변환
reviews_df["date"] = (
    reviews_df["timestamp"]
    .dt.tz_localize("UTC")      # 1) 타임존 지정
    .dt.tz_convert("Asia/Seoul")  # 2) 한국시간으로 변환
    .dt.date
)

# 일자별 리뷰 집계
daily = reviews_df.groupby("date")["voted_up"].agg(count="count", pos="sum")
daily["pos_ratio"] = daily["pos"] / daily["count"]

print(daily.tail(10))

            count  pos  pos_ratio
date                             
2025-08-31     40   34   0.850000
2025-09-01     42   39   0.928571
2025-09-02     38   32   0.842105
2025-09-03     24   23   0.958333
2025-09-04     32   32   1.000000
2025-09-05     29   28   0.965517
2025-09-06     38   32   0.842105
2025-09-07     37   34   0.918919
2025-09-08     29   25   0.862069
2025-09-09     12   11   0.916667


In [11]:
# ==== 0) 준비 ====
import re
from collections import Counter
import pandas as pd

# NLTK (영문 불용어)
import nltk
try:
    _ = nltk.corpus.stopwords.words("english")
except:
    nltk.download("stopwords")
from nltk.corpus import stopwords
en_stop = set(stopwords.words("english")) | {"game","games"}  # 도메인 일반어 조금 추가 차단

# KoNLPy (한글 형태소) - 설치가 안 되어 있으면 간단 토크나이저로 fallback
try:
    from konlpy.tag import Okt
    okt = Okt()
    def ko_tokens(text: str):
        # 보통 명사/형용사 중심으로 노이즈 줄이기
        toks = okt.nouns(text)
        # 한글 2글자 이상 / 숫자·기호 제거 / 자주 나오는 조사/접속사류 제거
        toks = [t for t in toks if len(t) >= 2 and re.search(r"[가-힣]", t)]
        ko_stop = {"그리고","하지만","그러나","그런데","이번","정말","진짜","매우",
                   "좀","건","것","거","수","때","왜","더","이제","부분","문제","사람",
                   "게임","플레이","유저","콘텐츠","컨텐츠","개발","업데이트"}  # 필요시 추가
        return [t for t in toks if t not in ko_stop]
except Exception as e:
    print("[INFO] KoNLPy가 없거나 로딩 실패 → 간단 토크나이저로 대체합니다.", e)
    def ko_tokens(text: str):
        text = re.sub(r"[^가-힣\s]", " ", text)
        toks = [t for t in text.split() if len(t) >= 2]
        ko_stop = {"그리고","하지만","그러나","그런데","이번","정말","진짜","매우",
                   "좀","건","것","거","수","때","왜","더","이제","부분","문제","사람",
                   "게임","플레이","유저","콘텐츠","컨텐츠","개발","업데이트"}
        return [t for t in toks if t not in ko_stop]


# ==== 1) 부정(review_df)만 선택 ====
neg = reviews_df[(reviews_df["voted_up"] == False) & reviews_df["review"].notna()].copy()

# ==== 2) 언어별 토큰화 함수 ====
def en_tokens(text: str):
    # 소문자화, URL/코드/숫자 제거, 알파벳만
    text = text.lower()
    text = re.sub(r"http\S+|www\.\S+", " ", text)
    text = re.sub(r"[^a-z\s]", " ", text)
    toks = [t for t in text.split() if len(t) >= 2 and t not in en_stop]
    return toks

def tokenize_row(row):
    lang = (row.get("language") or "").lower()
    text = str(row["review"])
    if "korean" in lang or "kr" == lang:
        return ko_tokens(text)
    else:
        return en_tokens(text)

# ==== 3) 토큰 수집 및 빈도 ====
all_tokens = []
for _, r in neg.iterrows():
    all_tokens.extend(tokenize_row(r))

# 단일 단어(유니그램) TOP 20
uni_cnt = Counter(all_tokens)
top20_uni = pd.DataFrame(uni_cnt.most_common(20), columns=["token","count"])
display(top20_uni)

# ==== 4) 선택: 연어(빅그램)도 보고 싶다면 ====
def bigrams(tokens):
    return [" ".join(pair) for pair in zip(tokens, tokens[1:])]

all_bigrams = bigrams(all_tokens)
bi_cnt = Counter(all_bigrams)
top20_bi = pd.DataFrame(bi_cnt.most_common(20), columns=["bigram","count"])
display(top20_bi)

[INFO] KoNLPy가 없거나 로딩 실패 → 간단 토크나이저로 대체합니다. No module named 'konlpy'


Unnamed: 0,token,count
0,grounded,522
1,like,494
2,play,442
3,even,405
4,first,392
5,get,368
6,early,368
7,bugs,354
8,access,330
9,one,316


Unnamed: 0,bigram,count
0,early access,307
1,current state,84
2,first one,58
3,feels like,57
4,omni tool,48
5,first grounded,37
6,change review,35
7,every time,34
8,many bugs,33
9,unreal engine,32


In [12]:
import numpy as np
from collections import Counter
import pandas as pd

# 1) 긍정/부정 리뷰 토큰화
pos = reviews_df[(reviews_df["voted_up"]==True) & reviews_df["review"].notna()]
neg = reviews_df[(reviews_df["voted_up"]==False) & reviews_df["review"].notna()]

pos_tokens = []
for r in pos["review"]:
    pos_tokens.extend(en_tokens(str(r)))   # 언어 필터 필요 시 추가

neg_tokens = []
for r in neg["review"]:
    neg_tokens.extend(en_tokens(str(r)))

# 2) 각 집단 단어 빈도
pos_cnt = Counter(pos_tokens)
neg_cnt = Counter(neg_tokens)

# 3) 로그 오즈 비 계산 (간단 버전)
all_words = set(pos_cnt) | set(neg_cnt)
results = []
for w in all_words:
    a = neg_cnt[w] + 1   # 부정
    b = pos_cnt[w] + 1   # 긍정
    ratio = np.log(a/b)
    results.append((w, a, b, ratio))

df_ratio = pd.DataFrame(results, columns=["word","neg_count","pos_count","log_odds"])
# 부정쪽에서 상대적으로 많이 쓰인 단어
df_ratio.sort_values("log_odds", ascending=False).head(20)

Unnamed: 0,word,neg_count,pos_count,log_odds
10734,client,13,1,2.564949
7793,refunded,18,2,2.197225
10310,dogshit,8,1,2.079442
3846,member,8,1,2.079442
2842,dih,8,1,2.079442
10598,networking,8,1,2.079442
7828,bs,8,1,2.079442
6750,account,67,9,2.007468
9069,backward,7,1,1.94591
696,refunding,14,2,1.94591


In [13]:
pos_top20 = (
    df_ratio.sort_values("log_odds", ascending=True)
             .head(20)
             .reset_index(drop=True)
)

print(pos_top20)

           word  neg_count  pos_count  log_odds
0          peak          2        179 -4.494239
1   environment          1         31 -3.433987
2         scary          2         53 -3.277145
3        hooked          1         26 -3.258097
4        scared          1         24 -3.178054
5          pete          2         48 -3.178054
6        loving          5        114 -3.126761
7     fantastic          6        133 -3.098590
8    terrifying          1         22 -3.091042
9        goated          1         22 -3.091042
10    storyline          1         22 -3.091042
11     complain          1         21 -3.044522
12     exciting          2         41 -3.020425
13       lizard          2         41 -3.020425
14      shaping          1         19 -2.944439
15    challenge          2         37 -2.917771
16     upcoming          1         18 -2.890372
17    brilliant          1         18 -2.890372
18       sequal          1         18 -2.890372
19        chill          1         18 -2

In [14]:
reviews_df[reviews_df["review"].str.contains("peak", case=False)].sample(10)["review"]

Unnamed: 0,review
4834,PEAK
4363,This game is so peak bro i'd sacrifice my ball...
1579,Peak Game just like the first one
3507,"very fun, very buggy (not glitchy!!!!)\r\nbugg..."
5261,"Best game ever,\r\nThis game is absolute peak\..."
6864,"This game is peak, the best I could ask for wh..."
1141,Peak.
7182,Only speaking from my singleplayer experience:...
5032,"high-key peak, but the optimization needs to b..."
4198,first of all very good game i like how beautif...


In [18]:
print(reviews_df.loc[6864, "review"])


This game is peak, the best I could ask for when only in early access, however there are some "bugs". I mean the kinda bugs that interrupt progress. When I tried to start a game and invite some of my friends we got about 10 minutes in and then the game showed an error that said "Lost connection to host, returning to menu." After I got that message I tried logging back in only to be stuck on the "Logging in" screen. The only way I could get out of the game was to alt+f4 which meant some progress was lost, which is fine because we weren't very far, however after I tried logging back into grounded I was met with the same screen which caused me to restart my pc, but when I got back on I couldn't log back into Steam, the bug literally broke Steam. This only happened for about 20 minutes but I had to relog into everything which kinda sucked but after that I got back into Steam and was able to continue playing without any problems. So all in all the game is really fun but the bugs are to be e

In [19]:
mask_graphic = reviews_df["review"].str.contains("graphic|graphics|그래픽", case=False, na=False)
gfx_reviews = reviews_df[mask_graphic].copy()

In [20]:
summary = gfx_reviews.groupby("voted_up")["review"].count()
pos_ratio = summary[True] / summary.sum()
neg_ratio = summary[False] / summary.sum()
print("긍정:", pos_ratio, "부정:", neg_ratio)

긍정: 0.7128712871287128 부정: 0.2871287128712871


In [21]:
# 긍정 리뷰 중 그래픽 언급만
pos_gfx = gfx_reviews[gfx_reviews["voted_up"]==True]["review"].tolist()
# 부정 리뷰 중 그래픽 언급만
neg_gfx = gfx_reviews[gfx_reviews["voted_up"]==False]["review"].tolist()

In [22]:
pos_gfx

["My friends group and I have played dozens of Survival style games together. G2 seems to be a great game overall, runs well, graphics are good, decent combat, etc. The biggest issue is how they save group games. For a very long time we have always just paid to host private servers for our group play as that has proven the safest and most consistent. Ever other title we have played has this option but for some reason this is not an option for G2 (no private servers). They have a system to host and play together but it does not consistently work well. I play on a laptop and a desktop and can't even see my own saved multiplayer world on both PCs. For us its one of those things that's minor but also clearly a deal breaker. I have no idea why they can't just allow hosted servers. I will still play some when me friends aren't online or we aren't playing something else, just frustrating as I think this could be a lot of fun to play with our friends group.",
 'game is incredible beautiful gra

In [25]:
import pandas as pd

# pandas가 긴 텍스트를 자르지 않게
pd.set_option("display.max_colwidth", None)

import textwrap
import re, textwrap

def highlight_graphic(text, width=80):
    # 'graphic', 'graphics', '그래픽' 단어 강조 (빨간색)
    pattern = re.compile(r"(graphic[s]?|그래픽)", re.IGNORECASE)
    highlighted = pattern.sub("\033[91m\\1\033[0m", text)  # 빨강
    return textwrap.fill(highlighted, width=width)

# 긍정 샘플 3개
print("\n[긍정 샘플 3개]")
for s in pos_gfx[:3]:
    print("-"*60)
    print(highlight_graphic(s))
    wrapped = textwrap.fill(s, width=80)  # 80자 단위로 줄바꿈
    print(wrapped)

# 부정 샘플 3개
print("\n[부정 샘플 3개]")
for s in neg_gfx[:3]:
    print("-"*60)
    print(highlight_graphic(s))
    wrapped = textwrap.fill(s, width=80)
    print(wrapped)


[긍정 샘플 3개]
------------------------------------------------------------
My friends group and I have played dozens of Survival style games together. G2
seems to be a great game overall, runs well, [91mgraphics[0m are good, decent
combat, etc. The biggest issue is how they save group games. For a very long
time we have always just paid to host private servers for our group play as that
has proven the safest and most consistent. Ever other title we have played has
this option but for some reason this is not an option for G2 (no private
servers). They have a system to host and play together but it does not
consistently work well. I play on a laptop and a desktop and can't even see my
own saved multiplayer world on both PCs. For us its one of those things that's
minor but also clearly a deal breaker. I have no idea why they can't just allow
hosted servers. I will still play some when me friends aren't online or we
aren't playing something else, just frustrating as I think this could be a