# 합친 데이터 필터링

In [4]:
import pandas as pd

data = f'../data/merged_games_data.csv'

df = pd.read_csv(data, encoding='cp949')

print(df.head())

              게임 이름     원가    할인가  \
0  Dead by Daylight  21500   8600   
1        Grounded 2  34800  34800   
2      Ready or Not  53000  53000   
3              PEAK   8400   8400   
4        Mage Arena   3400   3400   

                                             사이트 URL   할인율  유저리뷰수 플랫폼 이름  \
0  https://store.steampowered.com/app/381210/Dead...  60.0   9914  Steam   
1  https://store.steampowered.com/app/2661300/Gro...   0.0   3012  Steam   
2  https://store.steampowered.com/app/1144200/Rea...   0.0  30759  Steam   
3   https://store.steampowered.com/app/3527290/PEAK/   0.0  38103  Steam   
4  https://store.steampowered.com/app/3716600/Mag...   0.0   3925  Steam   

                                             이미지 URL  \
0  https://shared.fastly.steamstatic.com/store_it...   
1  https://shared.fastly.steamstatic.com/store_it...   
2  https://shared.fastly.steamstatic.com/store_it...   
3  https://shared.fastly.steamstatic.com/store_it...   
4  https://shared.fastly.steamstatic.com

In [6]:
# 원가, 할인가 비어있으면 제거
df = df[~(df['원가'].isnull() | df['할인가'].isnull())]
df = df[~((df['원가'].str.strip() == '') | (df['할인가'].str.strip() == ''))]

# 장르 비어있으면 제거
df = df[~df['장르'].isnull()]
df = df[df['장르'].str.strip() != '']

print(df)

                 게임 이름      원가    할인가  \
0     Dead by Daylight   21500   8600   
1           Grounded 2   34800  34800   
2         Ready or Not   53000  53000   
3                 PEAK    8400   8400   
4           Mage Arena    3400   3400   
...                ...     ...    ...   
7257    Armored Fist 3   $9.99  $2.50   
7259  Worms Armageddon  $14.99  $2.64   
7260  F-22 Lightning 3   $9.99  $2.50   
7261    MiG-29 Fulcrum   $9.99  $2.50   
7262          Quake II   $9.99  $3.40   

                                                사이트 URL   할인율  유저리뷰수  \
0     https://store.steampowered.com/app/381210/Dead...  60.0   9914   
1     https://store.steampowered.com/app/2661300/Gro...   0.0   3012   
2     https://store.steampowered.com/app/1144200/Rea...   0.0  30759   
3      https://store.steampowered.com/app/3527290/PEAK/   0.0  38103   
4     https://store.steampowered.com/app/3716600/Mag...   0.0   3925   
...                                                 ...   ...    ...   
725

In [18]:
import re

def normalize_age_rating(value):
    if pd.isna(value):
        return '전체 이용가'
    
    value = str(value)
    number = re.search(r'\d+', value)
    if number:
        return number.group() + '세 이용가'
    else:
        return '전체 이용가'

In [20]:
df['연령 등급'] = df['연령 등급'].apply(normalize_age_rating)

print(df['연령 등급'])

0        전체 이용가
1        전체 이용가
2        전체 이용가
3        전체 이용가
4        전체 이용가
         ...   
7257    12세 이용가
7259     전체 이용가
7260     전체 이용가
7261     전체 이용가
7262     전체 이용가
Name: 연령 등급, Length: 5314, dtype: object


In [25]:
EXCHANGE_RATE = 1398.94  # 1 USD = 1398.94 KRW

def clean_price(value):
    if pd.isna(value):
        return None

    value = str(value).strip()

    if value == '' or '무료' in value:
        return 0

    price_str = re.sub(r'[^\d.,]', '', value).replace(',', '')

    try:
        if '.' in price_str:
            usd = float(price_str)
            return int(round(usd * EXCHANGE_RATE))
        else:
            return int(price_str)
    except:
        return None

In [28]:
df['원가'] = df['원가'].apply(clean_price)
df['할인가'] = df['할인가'].apply(clean_price)

df['플랫폼 이름'] = df['플랫폼 이름'].replace({'Epic': 'Epic Games'})

df = df.drop_duplicates(subset=['게임 이름', '플랫폼 이름', '사이트 URL'])

df['할인율'] = df['할인율'].astype(str).str.replace('-', '', regex=False)

df.to_csv('../data/cleaned_merged_games_data.csv', index=False, encoding='utf-8')

print(df.head())

              게임 이름     원가    할인가  \
0  Dead by Daylight  21500   8600   
1        Grounded 2  34800  34800   
2      Ready or Not  53000  53000   
3              PEAK   8400   8400   
4        Mage Arena   3400   3400   

                                             사이트 URL   할인율  유저리뷰수 플랫폼 이름  \
0  https://store.steampowered.com/app/381210/Dead...  60.0   9914  Steam   
1  https://store.steampowered.com/app/2661300/Gro...   0.0   3012  Steam   
2  https://store.steampowered.com/app/1144200/Rea...   0.0  30759  Steam   
3   https://store.steampowered.com/app/3527290/PEAK/   0.0  38103  Steam   
4  https://store.steampowered.com/app/3716600/Mag...   0.0   3925  Steam   

                                             이미지 URL  \
0  https://shared.fastly.steamstatic.com/store_it...   
1  https://shared.fastly.steamstatic.com/store_it...   
2  https://shared.fastly.steamstatic.com/store_it...   
3  https://shared.fastly.steamstatic.com/store_it...   
4  https://shared.fastly.steamstatic.com