# This should pull the data, Steam Games Data, from kaggle. After this should clean out the irrelevant or missing data, then import it into PostgreSQL

NOTICE! 
- Ensure that your token for Kaggle is in the API folder, ensure it is added to gitignore
- Ensure that you configure PostgreSQL.py with appropriate information
- Disable 'os.remove(gamesCSV)' if you have 'RawData\games.csv' added to gitignore

In [37]:
import pandas as pd
# This is one of the libaries (opendataset) not covered we'll use in this project, this allows us to
# pull data directly from Kaggle API.
import os
APIPATH = os.path.abspath(os.path.join(os.getcwd(), '..', 'API'))
os.environ['KAGGLE_CONFIG_DIR'] = APIPATH

# Make sure to use: "pip install kaggle" in the terminal of your choice, or simply 'pip install kaggle' in a new Jupyter Cell
import kaggle


In [38]:
# NOTICE -- Due to the downloading the data everytime, it is recommended that you run this code minimally. I recommend adding 'games.csv' to get ignore, and removing line 14.
# Setting up download directory, the file should be added to gitignore, but I'm adding a 'clean-up' line that will delete the dataset once it's in a dataframe
downloadPath = os.path.abspath(os.path.join(os.getcwd(), '..', 'RawData'))
kaggle.api.dataset_download_files('fronkongames/steam-games-dataset', path=downloadPath, unzip=True)

# Deleting games.json (we don't need it)
bye = os.path.join('..','RawData','games.json')
os.remove(bye)


Dataset URL: https://www.kaggle.com/datasets/fronkongames/steam-games-dataset


In [39]:
# Loading the data into a data frame, and saying bye to games.csv
gamesCSV = os.path.join('..','RawData','games.csv')
SteamGamesRaw = pd.read_csv(gamesCSV)
# os.remove(gamesCSV) #bye #DISABLE THIS LINE OF CODE IF 'games.csv' IS ADDED TO GITIGNORE, AND SKIP CELL 2.
# hooray!!! 🥳🥳🥳
SteamGamesRaw.head()

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,...,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,['English'],...,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,0,4.99,0,Jolt Project: The army now has a new robotics ...,"['English', 'Portuguese - Brazil']",...,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.0,0,ABOUT THE GAME Play as a hacker who has arrang...,"['English', 'Spanish - Spain']",...,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...


In [40]:
# CLEANING TIME
SteamGamesRaw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85103 entries, 0 to 85102
Data columns (total 39 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   AppID                       85103 non-null  int64  
 1   Name                        85097 non-null  object 
 2   Release date                85103 non-null  object 
 3   Estimated owners            85103 non-null  object 
 4   Peak CCU                    85103 non-null  int64  
 5   Required age                85103 non-null  int64  
 6   Price                       85103 non-null  float64
 7   DLC count                   85103 non-null  int64  
 8   About the game              81536 non-null  object 
 9   Supported languages         85103 non-null  object 
 10  Full audio languages        85103 non-null  object 
 11  Reviews                     9743 non-null   object 
 12  Header image                85103 non-null  object 
 13  Website                     394

In [41]:
#Removing columns with either irrelevant or redundant data that will not help our study.
SteamGamesDroppedColumns = SteamGamesRaw.drop(['Support url','Header image','About the game','Website','Reviews','Support email','Metacritic url','Notes','Tags','Average playtime forever','Average playtime two weeks','Median playtime forever','Median playtime two weeks','Screenshots','Movies','About the game'],axis=1,inplace=False)
SteamGamesDroppedColumns 

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,Supported languages,Full audio languages,...,User score,Positive,Negative,Score rank,Achievements,Recommendations,Developers,Publishers,Categories,Genres
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,['English'],[],...,0,6,11,,30,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports"
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,"['English', 'French', 'Italian', 'German', 'Sp...",[],...,0,53,5,,12,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie"
2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,0,4.99,0,"['English', 'Portuguese - Brazil']",[],...,0,0,0,,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy"
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,"['English', 'French', 'Italian', 'German', 'Sp...",[],...,0,3,0,,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie"
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.00,0,"['English', 'Spanish - Spain']",[],...,0,50,8,,17,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85098,2669080,Mannerheim's Saloon Car,"Jan 2, 2024",0 - 0,0,0,0.00,0,"['English', 'Finnish']",['Finnish'],...,0,0,0,,0,0,Xamk Game Studios,"Sodan ja rauhan keskus Muisti, Päämajamuseo","Single-player,Tracked Controller Support,VR Only","Adventure,Simulation"
85099,2736910,Beer Run,"Jan 3, 2024",0 - 0,0,0,0.00,0,['English'],[],...,0,0,0,,0,0,955 Games,955 Games,Single-player,"Casual,Indie"
85100,2743220,My Friend The Spider,"Jan 4, 2024",0 - 0,0,0,0.00,0,['English'],['English'],...,0,0,0,,0,0,MCA,MCA,Single-player,"Adventure,Simulation"
85101,2293130,Path of Survivors,"Jan 8, 2024",0 - 0,0,0,3.99,0,['English'],[],...,0,0,0,,34,0,Limited Input,Limited Input,"Single-player,Steam Achievements,Partial Contr...","Action,Casual,Indie,RPG,Simulation"


In [42]:
# Removing rows with 0 peak concurrent users (peak ccu)
SteamGamesDroppedColumns = SteamGamesDroppedColumns[SteamGamesDroppedColumns['Peak CCU'] != 0]
SteamGamesDroppedColumns

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,Supported languages,Full audio languages,...,User score,Positive,Negative,Score rank,Achievements,Recommendations,Developers,Publishers,Categories,Genres
5,1469160,Wartune Reborn,"Feb 26, 2021",50000 - 100000,68,0,0.00,0,['English'],[],...,0,87,49,,0,0,7Road,7Road,"Single-player,Multi-player,MMO,PvP,Online PvP,...","Adventure,Casual,Free to Play,Massively Multip..."
6,1659180,TD Worlds,"Jan 9, 2022",0 - 20000,3,0,10.99,1,"['English', 'Russian', 'Danish']",[],...,0,21,7,,62,0,MAKSIM VOLKAU,MAKSIM VOLKAU,"Single-player,Steam Achievements,Steam Cloud","Indie,Strategy"
7,1968760,Legend of Rome - The Wrath of Mars,"May 5, 2022",0 - 20000,2,0,9.99,0,"['English', 'German']","['English', 'German']",...,0,0,0,,0,0,magnussoft,magnussoft,"Single-player,Steam Cloud",Casual
8,1178150,MazM: Jekyll and Hyde,"Apr 2, 2020",0 - 20000,1,0,14.99,0,"['English', 'French', 'Italian', 'German', 'Sp...",[],...,0,76,6,,25,0,Growing Seeds,"CFK Co., Ltd.","Single-player,Steam Achievements,Full controll...","Adventure,RPG,Simulation,Strategy"
10,1026420,WARSAW,"Oct 2, 2019",20000 - 50000,5,0,23.99,0,"['English', 'French', 'German', 'Polish', 'Rus...",[],...,0,589,212,,34,427,Pixelated Milk,"Pixelated Milk,gaming company","Single-player,Steam Achievements,Steam Trading...","Indie,RPG"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85058,2738420,Backrooms: Eight Levels,"Jan 6, 2024",0 - 20000,3,0,0.74,0,['English'],[],...,0,1,0,,0,0,Performance Artist,Performance Artist,Single-player,"Action,Adventure,Casual,Indie,RPG,Simulation"
85077,2704060,Ant Farm Simulator,"Jan 5, 2024",0 - 20000,3,0,0.99,0,"['English', 'Italian', 'German', 'Spanish - Sp...",[],...,0,1,1,,0,0,Smirna Simulator Games,Smirna Simulator Games,Single-player,"Casual,Indie,Simulation,Early Access"
85084,2508400,Mothers and Daughters,"Jan 2, 2024",0 - 20000,29,0,8.49,0,['English'],[],...,0,0,0,,0,0,Spin256,Spin256,Single-player,"Adventure,Indie,Early Access"
85086,2000770,Ballance,"Jan 5, 2024",0 - 20000,34,0,3.59,0,"['English', 'French', 'Italian', 'German', 'Sp...",[],...,0,0,0,,0,109,Cyparade,Ziggurat,Single-player,Casual


In [45]:
# Removing rows that don't contain Language information '[]' in 'Full audio languages' column
SteamGamesDroppedColumns = SteamGamesDroppedColumns[SteamGamesDroppedColumns['Full audio languages'] != '[]']
SteamGamesDroppedColumns

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,Supported languages,Full audio languages,...,User score,Positive,Negative,Score rank,Achievements,Recommendations,Developers,Publishers,Categories,Genres
7,1968760,Legend of Rome - The Wrath of Mars,"May 5, 2022",0 - 20000,2,0,9.99,0,"['English', 'German']","['English', 'German']",...,0,0,0,,0,0,magnussoft,magnussoft,"Single-player,Steam Cloud",Casual
22,434030,Aerofly FS 2 Flight Simulator,"Nov 20, 2017",100000 - 200000,19,0,37.49,23,"['English', 'German', 'French']","['English', 'German', 'French']",...,0,1490,408,,0,1831,IPACS,IPACS,"Single-player,Partial Controller Support","Action,Indie,Racing,Simulation"
39,262150,Vanguard Princess,"Mar 3, 2014",200000 - 500000,1,0,2.49,7,['English'],['English'],...,0,2014,841,,0,1477,Tomoaki Sugeno,eigoMANGA,"Single-player,Multi-player,PvP,Shared/Split Sc...","Action,Casual,Indie"
40,1192900,IRON REBELLION,"Nov 9, 2021",50000 - 100000,2,0,11.99,0,['English'],['English'],...,0,168,39,,0,186,Black Beach Studio,Black Beach Studio,"Multi-player,PvP,Online PvP,Co-op,Online Co-op...","Action,Indie,Early Access"
42,231330,Deadfall Adventures,"Nov 15, 2013",100000 - 200000,4,0,19.99,0,"['English', 'German', 'Polish', 'French', 'Rus...","['English', 'German', 'Russian']",...,0,1716,628,,50,1140,The Farm 51,THQ Nordic,"Single-player,Multi-player,Co-op,Steam Achieve...","Action,Adventure"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84971,2707500,ROKO'S BASILISK,"Jan 5, 2024",0 - 20000,1,0,1.79,0,"['English', 'Spanish - Spain', 'Portuguese - B...","['English', 'Simplified Chinese']",...,0,9,0,,3,0,Roko,Roko,"Single-player,Steam Achievements",Indie
84981,2710320,THE DESCENT,"Jan 5, 2024",0 - 20000,4,0,6.79,0,"['English', 'French', 'Italian', 'German', 'Sp...","['English', 'Ukrainian']",...,0,8,1,,12,0,Celeritas Games,Celeritas Games,"Single-player,Steam Achievements","Indie,Simulation"
85015,2719650,Project XSTING,"Jan 5, 2024",0 - 20000,3,0,7.99,0,"['English', 'Spanish - Spain', 'Portuguese - B...","['English', 'Portuguese - Brazil']",...,0,5,0,,42,0,Saucy Melon,Saucy Melon,"Single-player,Steam Achievements,Steam Cloud","Action,Casual,Indie,Early Access"
85034,2234690,The Day Before You Gone,"Jan 5, 2024",20000 - 50000,3,0,7.64,2,"['English', 'Russian']",['Russian'],...,0,11,0,,30,0,Aequalis Studio,Aequalis Studio,"Single-player,Steam Achievements","Adventure,Indie"
