In [4]:
import pandas as pd
import sys
import csv # quoting 옵션을 위해 import


In [5]:
pd.set_option('display.max_columns', None)

In [6]:
# --- 설정 부분 ---

# 원본 CSV 파일 경로
original_csv_path = 'steam_games_dataset.csv' # 사용자의 전체 데이터 파일 이름으로 변경하세요.

# 저장될 파일 이름
structured_output_path = 'steam_games_structured_data.csv'
unstructured_output_path = 'steam_games_unstructured_data.csv'


# --- 데이터 분리 로직 ---

print("스크립트를 시작합니다.")

try:
    
    # 원본 CSV 파일 불러오기
    print(f"'{original_csv_path}' 파일을 읽는 중입니다...")
    df = pd.read_csv(original_csv_path,
        encoding='utf-8',
        index_col=False, # CSV의 첫 번째 열을 인덱스로 사용하지 않도록 명시
        header=0
        )
    print("파일을 성공적으로 불러왔습니다.")
    
    # 1-1. 정형 데이터 컬럼 (주로 숫자형 데이터)
    # MySQL Workbench에서 INT, BIGINT, DOUBLE, TINYINT 등으로 확인된 컬럼들
    # ★★★ Name 컬럼을 추가하여 데이터 가독성 및 편의성 향상 ★★★
    structured_columns = [
        'AppID', 'Name', 'Peak CCU', 'Required age', 'Price', 'DiscountDLC count', 'Windows',
        'Mac', 'Linux', 'Metacritic score', 'User score', 'Positive', 'Negative',
        'Score rank', 'Achievements', 'Recommendations', 'Average playtime forever',
        'Average playtime two weeks', 'Median playtime forever', 'Median playtime two weeks'
    ]
    
    # 1-2. 비정형 데이터 컬럼 (주로 텍스트 데이터)
    # RAG의 검색 대상이 될 의미있는 텍스트를 포함한 컬럼들
    # 2-2. 비정형 데이터 컬럼
    unstructured_columns = [
        'AppID', 'Name', 'Release date', 'Estimated owners', 'About the game',
        'Supported languages', 'Full audio languages', 'Reviews', 'Header image',
        'Website', 'Support url', 'Support email', 'Metacritic url', 'Notes',
        'Developers', 'Publishers', 'Categories', 'Genres', 'Tags', 'Screenshots', 'Movies'
    ]
    

    # 3. 정의된 컬럼 목록을 사용하여 두 개의 DataFrame으로 분리
    # 원본 DataFrame에 해당 컬럼이 없는 경우를 대비하여 에러 핸들링 추가
    
    # 사용 가능한 컬럼만 필터링
    available_structured_cols = [col for col in structured_columns if col in df.columns]
    available_unstructured_cols = [col for col in unstructured_columns if col in df.columns]

    df_structured = df[available_structured_cols]
    df_unstructured = df[available_unstructured_cols]

    print("정형 데이터와 비정형 데이터로 분리를 완료했습니다.")

    # 4. 분리된 DataFrame을 각각의 CSV 파일로 저장
    # encoding='utf-8-sig'는 엑셀에서 한글이 깨지지 않게 하기 위함입니다.
    df_structured.to_csv(structured_output_path, index=False, encoding='utf-8')
    print(f"-> 정형 데이터가 '{structured_output_path}' 파일로 저장되었습니다.")
    
    df_unstructured.to_csv(unstructured_output_path, index=False, encoding='utf-8')
    print(f"-> 비정형 데이터가 '{unstructured_output_path}' 파일로 저장되었습니다.")
    
    print("\n모든 작업이 완료되었습니다!")

except FileNotFoundError:
    print(f"오류: '{original_csv_path}' 파일을 찾을 수 없습니다. 파일 이름을 확인해주세요.")
    sys.exit(1)
except Exception as e:
    print(f"오류가 발생했습니다: {e}")
    sys.exit(1)

스크립트를 시작합니다.
'steam_games_dataset.csv' 파일을 읽는 중입니다...


  df = pd.read_csv(original_csv_path,


파일을 성공적으로 불러왔습니다.
정형 데이터와 비정형 데이터로 분리를 완료했습니다.
-> 정형 데이터가 'steam_games_structured_data.csv' 파일로 저장되었습니다.
-> 비정형 데이터가 'steam_games_unstructured_data.csv' 파일로 저장되었습니다.

모든 작업이 완료되었습니다!


In [10]:
df_structured_simple = df_structured[0:10]
df_unstructured_simple = df_unstructured[0:10]

In [11]:
df_structured_simple

Unnamed: 0,AppID,Name,Peak CCU,Required age,Price,DiscountDLC count,Windows,Mac,Linux,Metacritic score,User score,Positive,Negative,Score rank,Achievements,Recommendations,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks
0,20200,Galactic Bowling,0,0,19.99,0,,True,False,False,,0,6,11,,30,,0,0,0
1,655370,Train Bandit,0,0,0.99,0,support@rustymoyher.com,True,True,False,,0,53,5,,12,,0,0,0
2,1732930,Jolt Project,0,0,4.99,0,ramoncampiaof31@gmail.com,True,False,False,,0,0,0,,0,,0,0,0
3,1355720,Henosis™,0,0,5.99,0,info@henosisgame.com,True,True,True,,0,3,0,,0,,0,0,0
4,1139950,Two Weeks in Painland,0,0,0.0,0,welistentoyou@unusual-games.com,True,True,False,,0,50,8,,17,This Game may contain content not appropriate ...,0,0,0
5,1469160,Wartune Reborn,68,0,0.0,0,https://wartune@7road.com,True,False,False,,0,87,49,,0,,0,0,0
6,1659180,TD Worlds,3,0,10.99,0,td.worlds.official@gmail.com,True,False,False,,0,21,7,,62,,0,0,0
7,1968760,Legend of Rome - The Wrath of Mars,2,0,9.99,0,support@magnussoft.de,True,False,False,,0,0,0,,0,,0,0,0
8,1178150,MazM: Jekyll and Hyde,1,0,14.99,0,cfk@cfk.kr,True,False,False,,0,76,6,,25,,0,0,0
9,320150,Deadlings: Rotten Edition,0,0,3.99,0,support@omlgames.com,True,True,True,,0,225,45,,32,,703,0,782


In [12]:
df_unstructured_simple

Unnamed: 0,AppID,Name,Release date,Estimated owners,About the game,Supported languages,Full audio languages,Reviews,Header image,Website,Support url,Support email,Metacritic url,Notes,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,Galactic Bowling is an exaggerated and stylize...,['English'],[],,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.galacticbowling.net,,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,THE LAW!! Looks to be a showdown atop a train....,"['English', 'French', 'Italian', 'German', 'Sp...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,http://trainbandit.com,,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...
2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,Jolt Project: The army now has a new robotics ...,"['English', 'Portuguese - Brazil']",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",,https://cdn.akamai.steamstatic.com/steam/apps/...
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"['English', 'French', 'Italian', 'German', 'Sp...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,https://henosisgame.com/,https://henosisgame.com/,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,ABOUT THE GAME Play as a hacker who has arrang...,"['English', 'Spanish - Spain']",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.unusual-games.com/home/,https://www.unusual-games.com/contact/,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...
5,1469160,Wartune Reborn,"Feb 26, 2021",50000 - 100000,0,Feel tired of auto-fight? Feel tired of boring...,['English'],[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,https://7.wan.com,0,0,0,7Road,7Road,"Single-player,Multi-player,MMO,PvP,Online PvP,...","Adventure,Casual,Free to Play,Massively Multip...","Turn-Based Combat,Massively Multiplayer,Multip...",https://cdn.akamai.steamstatic.com/steam/apps/...
6,1659180,TD Worlds,"Jan 9, 2022",0 - 20000,1,"TD Worlds is a dynamic, highly strategical gam...","['English', 'Russian', 'Danish']",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,,,0,0,0,MAKSIM VOLKAU,MAKSIM VOLKAU,"Single-player,Steam Achievements,Steam Cloud","Indie,Strategy","Tower Defense,Rogue-lite,RTS,Replay Value,Perm...",https://cdn.akamai.steamstatic.com/steam/apps/...
7,1968760,Legend of Rome - The Wrath of Mars,"May 5, 2022",0 - 20000,0,When the Roman people honored a simple warrior...,"['English', 'German']","['English', 'German']",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://magnussoft.biz/,http://magnussoft.biz/,0,0,0,magnussoft,magnussoft,"Single-player,Steam Cloud",Casual,,https://cdn.akamai.steamstatic.com/steam/apps/...
8,1178150,MazM: Jekyll and Hyde,"Apr 2, 2020",0 - 20000,0,'MazM: Jekyll and Hyde' is a darkly entertaini...,"['English', 'French', 'Italian', 'German', 'Sp...",[],,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.cfk.kr/,http://www.cfk.kr,0,0,0,Growing Seeds,"CFK Co., Ltd.","Single-player,Steam Achievements,Full controll...","Adventure,RPG,Simulation,Strategy","Adventure,Simulation,RPG,Strategy,Singleplayer...",https://cdn.akamai.steamstatic.com/steam/apps/...
9,320150,Deadlings: Rotten Edition,"Nov 11, 2014",50000 - 100000,0,Death is lonely. He has zero friends on his Fa...,"['English', 'Polish', 'French', 'Italian', 'Ge...","['English', 'Japanese']",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.omlgames.com,http://www.omlgames.com,0,0,0,ONE MORE LEVEL,ONE MORE LEVEL,"Single-player,Steam Achievements,Steam Trading...","Action,Adventure,Indie","Action,Indie,Adventure,Puzzle-Platformer,Arcad...",https://cdn.akamai.steamstatic.com/steam/apps/...


In [13]:
df.columns

Index(['AppID', 'Name', 'Release date', 'Estimated owners', 'Peak CCU',
       'Required age', 'Price', 'DiscountDLC count', 'About the game',
       'Supported languages', 'Full audio languages', 'Reviews',
       'Header image', 'Website', 'Support url', 'Support email', 'Windows',
       'Mac', 'Linux', 'Metacritic score', 'Metacritic url', 'User score',
       'Positive', 'Negative', 'Score rank', 'Achievements', 'Recommendations',
       'Notes', 'Average playtime forever', 'Average playtime two weeks',
       'Median playtime forever', 'Median playtime two weeks', 'Developers',
       'Publishers', 'Categories', 'Genres', 'Tags', 'Screenshots', 'Movies'],
      dtype='object')

In [14]:
df_structured_simple.to_csv('simple_steam_games_structured_data.csv', index=False, encoding='utf-8')
print(f"-> 정형 데이터가 'simple_steam_games_structured_data' 파일로 저장되었습니다.")

df_unstructured_simple.to_csv('simple_steam_games_unstructured_data.csv', index=False, encoding='utf-8')
print(f"-> 비정형 데이터가 'simple_steam_games_unstructured_data' 파일로 저장되었습니다.")

-> 정형 데이터가 'simple_steam_games_structured_data' 파일로 저장되었습니다.
-> 비정형 데이터가 'simple_steam_games_unstructured_data' 파일로 저장되었습니다.
