In [41]:
import pandas as pd
import ast
from category_encoders import BinaryEncoder

In [42]:
df = pd.read_csv('Data/PricePrediction.csv')
df

Unnamed: 0,name,steam_appid,is_free,descriptions,supported_languages,developers,publishers,categories,genres,release_date,min_requirements,rec_requirements,total_recommendations,price
0,Clash of Warlords,1430720,False,Join the Epic Battles in this most addicting t...,"['Simplified Chinese', 'Traditional Chinese']",['XINLINE GAMES'],['XINLINE GAMES'],"['Single-player', 'In-App Purchases', 'Family ...",['Strategy'],"Feb 7, 2021",Minimum:OS: Windows7Processor: Intel Core i3 o...,Recommended:OS: Windows10Processor: Intel Core...,0,699.0
1,Mine Crazy: The Korean Grinder,1430740,False,This is the essence of Korean grinder MMO. Thi...,['English'],['Dano Sato'],['RealMono Inc.'],"['Single-player', 'Family Sharing']","['Casual', 'Indie', 'RPG', 'Simulation']","Oct 8, 2020","Minimum:OS *: Windows 7,10Processor: AnyMemory...","Minimum:OS *: Windows 7,10Processor: AnyMemory...",0,199.0
2,Fade,1430100,False,Set in a small town in the Mid-west. You are a...,['English'],['Azimyth Studios'],['Azimyth Studios'],"['Single-player', 'Family Sharing']","['Indie', 'RPG']","Oct 29, 2020",Minimum:OS *: Windows VistaProcessor: Intel Co...,Minimum:OS *: Windows VistaProcessor: Intel Co...,0,199.0
3,Clash: Artifacts of Chaos,1430680,False,ZENO EDITION About the Game You play as Pseudo...,"['English', 'French', 'Italian', 'German', 'Sp...",['ACE Team'],['Nacon'],"['Single-player', 'Steam Achievements', 'Steam...","['Action', 'Adventure', 'Indie']","Mar 9, 2023",Minimum:Requires a 64-bit processor and operat...,Recommended:Requires a 64-bit processor and op...,759,2999.0
4,Astatos,1430970,False,"Astatos, a card-battling adventure filled with...","['English', 'Simplified Chinese', 'Traditional...",['Studio Klondike Australia'],['Studio Klondike'],"['Single-player', 'Multi-player', 'PvP', 'Onli...","['Adventure', 'Indie', 'Strategy', 'Early Acce...","Dec 16, 2021",Minimum:Requires a 64-bit processor and operat...,Recommended:Requires a 64-bit processor and op...,243,1499.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91278,Survive Avalon,1617600,False,The big 1.5 update is coming in next month!We ...,['English'],['Sky Empire Games'],['Sky Empire Games'],"['Single-player', 'Partial Controller Support'...","['Action', 'Adventure']","May 31, 2021",Minimum:Requires a 64-bit processor and operat...,Recommended:Requires a 64-bit processor and op...,0,1299.0
91279,Cam Circle VR,1617770,False,Cam Circle VR is a mixed reality (MR) tool tha...,['English'],['Reality Inside Ltd.'],['Reality Inside Ltd.'],"['Tracked Controller Support', 'VR Only']",['Utilities'],"Nov 16, 2021",Minimum:Requires a 64-bit processor and operat...,Recommended:Requires a 64-bit processor and op...,0,299.0
91280,Neon Light,1617780,False,Neon Light is a 2D platformer with interesting...,['English'],['Dani Gas'],['Louie Inc'],"['Single-player', 'Steam Achievements', 'Famil...","['Adventure', 'Casual', 'Indie']","May 17, 2021",Minimum:OS *: Windows 7/8/10Processor: Intel C...,Minimum:OS *: Windows 7/8/10Processor: Intel C...,0,199.0
91281,G for Gravity,1617790,True,Humankind is in danger! Asteroids hit and dest...,['English'],['OGW G5'],['MasterDroid'],"['Single-player', 'Steam Achievements']","['Casual', 'Free To Play', 'Indie']","May 10, 2021",Minimum:OS: Windows 10Processor: Intel(R) Core...,Recommended:OS: Windows 10Processor: Ryzen 5 1...,0,0.0


### Drop unwanted columns

In [43]:
df.drop(columns=['name', 'min_requirements', 'rec_requirements', 'descriptions'], inplace=True)
df.head()

Unnamed: 0,steam_appid,is_free,supported_languages,developers,publishers,categories,genres,release_date,total_recommendations,price
0,1430720,False,"['Simplified Chinese', 'Traditional Chinese']",['XINLINE GAMES'],['XINLINE GAMES'],"['Single-player', 'In-App Purchases', 'Family ...",['Strategy'],"Feb 7, 2021",0,699.0
1,1430740,False,['English'],['Dano Sato'],['RealMono Inc.'],"['Single-player', 'Family Sharing']","['Casual', 'Indie', 'RPG', 'Simulation']","Oct 8, 2020",0,199.0
2,1430100,False,['English'],['Azimyth Studios'],['Azimyth Studios'],"['Single-player', 'Family Sharing']","['Indie', 'RPG']","Oct 29, 2020",0,199.0
3,1430680,False,"['English', 'French', 'Italian', 'German', 'Sp...",['ACE Team'],['Nacon'],"['Single-player', 'Steam Achievements', 'Steam...","['Action', 'Adventure', 'Indie']","Mar 9, 2023",759,2999.0
4,1430970,False,"['English', 'Simplified Chinese', 'Traditional...",['Studio Klondike Australia'],['Studio Klondike'],"['Single-player', 'Multi-player', 'PvP', 'Onli...","['Adventure', 'Indie', 'Strategy', 'Early Acce...","Dec 16, 2021",243,1499.0


### Convert list-like columns to lists

In [44]:
df['supported_languages'] = df['supported_languages'].apply(ast.literal_eval)
df['genres'] = df['genres'].apply(ast.literal_eval)
df['categories'] = df['categories'].apply(ast.literal_eval)
df['publishers'] = df['publishers'].apply(ast.literal_eval)
df['developers'] = df['developers'].apply(ast.literal_eval)

### Add number of multi-values columns

In [45]:
df['num_languages'] = df['supported_languages'].apply(len)
df['num_developers'] = df['developers'].apply(len)
df['num_publishers'] = df['publishers'].apply(len)
df['num_genres'] = df['genres'].apply(len)
df['num_categories'] = df['categories'].apply(len)

### One hot encoding languages, genres and categories

In [46]:
df_exploded = df.explode('supported_languages').explode('genres').explode('categories')
df_encoded = pd.get_dummies(df_exploded, columns=['supported_languages', 'genres', 'categories'], prefix=['lang', 'gen', 'cat'])
df_encoded

Unnamed: 0,steam_appid,is_free,developers,publishers,release_date,total_recommendations,price,num_languages,num_developers,num_publishers,...,cat_Steam Timeline,cat_Steam Trading Cards,cat_Steam Turn Notifications,cat_Steam Workshop,cat_SteamVR Collectibles,cat_Tracked Controller Support,cat_VR Only,cat_VR Support,cat_VR Supported,cat_Valve Anti-Cheat enabled
0,1430720,False,[XINLINE GAMES],[XINLINE GAMES],"Feb 7, 2021",0,699.0,2,1,1,...,False,False,False,False,False,False,False,False,False,False
0,1430720,False,[XINLINE GAMES],[XINLINE GAMES],"Feb 7, 2021",0,699.0,2,1,1,...,False,False,False,False,False,False,False,False,False,False
0,1430720,False,[XINLINE GAMES],[XINLINE GAMES],"Feb 7, 2021",0,699.0,2,1,1,...,False,False,False,False,False,False,False,False,False,False
0,1430720,False,[XINLINE GAMES],[XINLINE GAMES],"Feb 7, 2021",0,699.0,2,1,1,...,False,False,False,False,False,False,False,False,False,False
0,1430720,False,[XINLINE GAMES],[XINLINE GAMES],"Feb 7, 2021",0,699.0,2,1,1,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91282,1617800,True,[flashdim],[flashdim],"Jul 28, 2021",0,0.0,1,1,1,...,False,False,False,False,False,False,False,False,False,False
91282,1617800,True,[flashdim],[flashdim],"Jul 28, 2021",0,0.0,1,1,1,...,False,False,False,False,False,False,False,False,False,False
91282,1617800,True,[flashdim],[flashdim],"Jul 28, 2021",0,0.0,1,1,1,...,False,False,False,False,False,False,False,False,False,False
91282,1617800,True,[flashdim],[flashdim],"Jul 28, 2021",0,0.0,1,1,1,...,False,False,False,False,False,False,False,False,False,False


In [47]:
df_merged = df_encoded.groupby('steam_appid', as_index=False).max()
df_merged

Unnamed: 0,steam_appid,is_free,developers,publishers,release_date,total_recommendations,price,num_languages,num_developers,num_publishers,...,cat_Steam Timeline,cat_Steam Trading Cards,cat_Steam Turn Notifications,cat_Steam Workshop,cat_SteamVR Collectibles,cat_Tracked Controller Support,cat_VR Only,cat_VR Support,cat_VR Supported,cat_Valve Anti-Cheat enabled
0,10,False,[Valve],[Valve],"Nov 1, 2000",157570,999.0,8,1,1,...,False,False,False,False,False,False,False,False,False,True
1,50,False,[Gearbox Software],[Valve],"Nov 1, 1999",21453,499.0,4,1,1,...,False,False,False,False,False,False,False,False,False,True
2,70,False,[Valve],[Valve],"Nov 19, 1998",102430,999.0,8,1,1,...,False,False,False,False,False,False,False,False,False,True
3,80,False,[Valve],[Valve],"Mar 1, 2004",19364,999.0,8,1,1,...,False,False,False,False,False,False,False,False,False,True
4,130,False,[Gearbox Software],[Valve],"Jun 1, 2001",15826,499.0,3,1,1,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91278,3463470,False,[Parham],[Parham],"Jan 31, 2025",0,99.0,1,1,1,...,False,False,False,False,False,False,False,False,False,False
91279,3464560,True,"[Andrew Wei, Annie Zhang, Brandon Wang, Calex ...",[Gun Jumpers],"Feb 3, 2025",0,0.0,1,9,1,...,False,False,False,False,False,False,False,False,False,False
91280,3467390,False,[Bossa Studios],[CULT Games],"Jan 24, 2025",0,499.0,8,1,1,...,False,False,False,False,False,False,False,False,False,False
91281,3469570,False,[Elephant AR],[Big Fish Games],"Jan 21, 2025",0,1399.0,3,1,1,...,False,False,False,False,False,False,False,False,False,False


In [48]:
print(df[df['steam_appid'] == 583950].iloc[0]['categories'])

['Single-player', 'Multi-player', 'PvP', 'Online PvP', 'Steam Cloud']


In [49]:
df_merged[df_merged['steam_appid'] == 583950][['cat_Single-player', 'cat_Multi-player', 'cat_PvP', 'cat_Online PvP', 'cat_Steam Cloud']]

Unnamed: 0,cat_Single-player,cat_Multi-player,cat_PvP,cat_Online PvP,cat_Steam Cloud
12907,True,True,True,True,True


In [50]:
df = df_merged

### Encoding publishers & developers (Binary Encoding)

In [51]:
df

Unnamed: 0,steam_appid,is_free,developers,publishers,release_date,total_recommendations,price,num_languages,num_developers,num_publishers,...,cat_Steam Timeline,cat_Steam Trading Cards,cat_Steam Turn Notifications,cat_Steam Workshop,cat_SteamVR Collectibles,cat_Tracked Controller Support,cat_VR Only,cat_VR Support,cat_VR Supported,cat_Valve Anti-Cheat enabled
0,10,False,[Valve],[Valve],"Nov 1, 2000",157570,999.0,8,1,1,...,False,False,False,False,False,False,False,False,False,True
1,50,False,[Gearbox Software],[Valve],"Nov 1, 1999",21453,499.0,4,1,1,...,False,False,False,False,False,False,False,False,False,True
2,70,False,[Valve],[Valve],"Nov 19, 1998",102430,999.0,8,1,1,...,False,False,False,False,False,False,False,False,False,True
3,80,False,[Valve],[Valve],"Mar 1, 2004",19364,999.0,8,1,1,...,False,False,False,False,False,False,False,False,False,True
4,130,False,[Gearbox Software],[Valve],"Jun 1, 2001",15826,499.0,3,1,1,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91278,3463470,False,[Parham],[Parham],"Jan 31, 2025",0,99.0,1,1,1,...,False,False,False,False,False,False,False,False,False,False
91279,3464560,True,"[Andrew Wei, Annie Zhang, Brandon Wang, Calex ...",[Gun Jumpers],"Feb 3, 2025",0,0.0,1,9,1,...,False,False,False,False,False,False,False,False,False,False
91280,3467390,False,[Bossa Studios],[CULT Games],"Jan 24, 2025",0,499.0,8,1,1,...,False,False,False,False,False,False,False,False,False,False
91281,3469570,False,[Elephant AR],[Big Fish Games],"Jan 21, 2025",0,1399.0,3,1,1,...,False,False,False,False,False,False,False,False,False,False


In [52]:
df_exploded = df[['steam_appid', 'publishers']].explode('publishers')

encoder = BinaryEncoder(cols=['publishers'])
df_encoded = encoder.fit_transform(df_exploded)
df_encoded = df_encoded.apply(pd.to_numeric, errors='coerce')

df_final = df_encoded.groupby('steam_appid').mean().reset_index()
df = pd.merge(df.drop(columns=['publishers']), df_final, how='left', on='steam_appid')
df

Unnamed: 0,steam_appid,is_free,developers,release_date,total_recommendations,price,num_languages,num_developers,num_publishers,num_genres,...,publishers_6,publishers_7,publishers_8,publishers_9,publishers_10,publishers_11,publishers_12,publishers_13,publishers_14,publishers_15
0,10,False,[Valve],"Nov 1, 2000",157570,999.0,8,1,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,50,False,[Gearbox Software],"Nov 1, 1999",21453,499.0,4,1,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,70,False,[Valve],"Nov 19, 1998",102430,999.0,8,1,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,80,False,[Valve],"Mar 1, 2004",19364,999.0,8,1,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,130,False,[Gearbox Software],"Jun 1, 2001",15826,499.0,3,1,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91278,3463470,False,[Parham],"Jan 31, 2025",0,99.0,1,1,1,2,...,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0
91279,3464560,True,"[Andrew Wei, Annie Zhang, Brandon Wang, Calex ...","Feb 3, 2025",0,0.0,1,9,1,2,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
91280,3467390,False,[Bossa Studios],"Jan 24, 2025",0,499.0,8,1,1,2,...,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
91281,3469570,False,[Elephant AR],"Jan 21, 2025",0,1399.0,3,1,1,2,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0


In [53]:
df_exploded = df[['steam_appid', 'developers']].explode('developers')

encoder = BinaryEncoder(cols=['developers'])
df_encoded = encoder.fit_transform(df_exploded)
df_encoded = df_encoded.apply(pd.to_numeric, errors='coerce')

df_final = df_encoded.groupby('steam_appid').mean().reset_index()
df = pd.merge(df.drop(columns=['developers']), df_final, how='left', on='steam_appid')
df

Unnamed: 0,steam_appid,is_free,release_date,total_recommendations,price,num_languages,num_developers,num_publishers,num_genres,num_categories,...,developers_6,developers_7,developers_8,developers_9,developers_10,developers_11,developers_12,developers_13,developers_14,developers_15
0,10,False,"Nov 1, 2000",157570,999.0,8,1,1,1,6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,1.000000
1,50,False,"Nov 1, 1999",21453,499.0,4,1,1,1,5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,1.000000,0.000000
2,70,False,"Nov 19, 1998",102430,999.0,8,1,1,1,11,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,1.000000
3,80,False,"Mar 1, 2004",19364,999.0,8,1,1,1,4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,1.000000
4,130,False,"Jun 1, 2001",15826,499.0,3,1,1,1,3,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,1.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91278,3463470,False,"Jan 31, 2025",0,99.0,1,1,1,2,3,...,1.0,0.0,0.0,0.0,1.0,1.0,0.000000,1.000000,1.000000,0.000000
91279,3464560,True,"Feb 3, 2025",0,0.0,1,9,1,2,2,...,0.0,0.0,1.0,1.0,1.0,0.0,0.888889,0.555556,0.555556,0.555556
91280,3467390,False,"Jan 24, 2025",0,499.0,8,1,1,2,8,...,1.0,1.0,1.0,0.0,1.0,0.0,1.000000,0.000000,1.000000,0.000000
91281,3469570,False,"Jan 21, 2025",0,1399.0,3,1,1,2,2,...,1.0,1.0,0.0,0.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000


### Encoding released date

#### Extracting Month/Day/Year

In [54]:
df['release_date'] = pd.to_datetime(df['release_date'], format='%b %d, %Y')

df['day'] = df['release_date'].dt.day
df['month'] = df['release_date'].dt.month
df['year'] = df['release_date'].dt.year

df.drop(columns=['release_date'], inplace=True)
df

Unnamed: 0,steam_appid,is_free,total_recommendations,price,num_languages,num_developers,num_publishers,num_genres,num_categories,lang_Afrikaans,...,developers_9,developers_10,developers_11,developers_12,developers_13,developers_14,developers_15,day,month,year
0,10,False,157570,999.0,8,1,1,1,6,False,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,1.000000,1,11,2000
1,50,False,21453,499.0,4,1,1,1,5,False,...,0.0,0.0,0.0,0.000000,0.000000,1.000000,0.000000,1,11,1999
2,70,False,102430,999.0,8,1,1,1,11,False,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,1.000000,19,11,1998
3,80,False,19364,999.0,8,1,1,1,4,False,...,0.0,0.0,0.0,0.000000,0.000000,0.000000,1.000000,1,3,2004
4,130,False,15826,499.0,3,1,1,1,3,False,...,0.0,0.0,0.0,0.000000,0.000000,1.000000,0.000000,1,6,2001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91278,3463470,False,0,99.0,1,1,1,2,3,False,...,0.0,1.0,1.0,0.000000,1.000000,1.000000,0.000000,31,1,2025
91279,3464560,True,0,0.0,1,9,1,2,2,False,...,1.0,1.0,0.0,0.888889,0.555556,0.555556,0.555556,3,2,2025
91280,3467390,False,0,499.0,8,1,1,2,8,False,...,0.0,1.0,0.0,1.000000,0.000000,1.000000,0.000000,24,1,2025
91281,3469570,False,0,1399.0,3,1,1,2,2,False,...,0.0,1.0,1.0,1.000000,1.000000,1.000000,1.000000,21,1,2025


#### Calculating age

In [55]:
df['age'] = 2025 - df['year']
df

Unnamed: 0,steam_appid,is_free,total_recommendations,price,num_languages,num_developers,num_publishers,num_genres,num_categories,lang_Afrikaans,...,developers_10,developers_11,developers_12,developers_13,developers_14,developers_15,day,month,year,age
0,10,False,157570,999.0,8,1,1,1,6,False,...,0.0,0.0,0.000000,0.000000,0.000000,1.000000,1,11,2000,25
1,50,False,21453,499.0,4,1,1,1,5,False,...,0.0,0.0,0.000000,0.000000,1.000000,0.000000,1,11,1999,26
2,70,False,102430,999.0,8,1,1,1,11,False,...,0.0,0.0,0.000000,0.000000,0.000000,1.000000,19,11,1998,27
3,80,False,19364,999.0,8,1,1,1,4,False,...,0.0,0.0,0.000000,0.000000,0.000000,1.000000,1,3,2004,21
4,130,False,15826,499.0,3,1,1,1,3,False,...,0.0,0.0,0.000000,0.000000,1.000000,0.000000,1,6,2001,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91278,3463470,False,0,99.0,1,1,1,2,3,False,...,1.0,1.0,0.000000,1.000000,1.000000,0.000000,31,1,2025,0
91279,3464560,True,0,0.0,1,9,1,2,2,False,...,1.0,0.0,0.888889,0.555556,0.555556,0.555556,3,2,2025,0
91280,3467390,False,0,499.0,8,1,1,2,8,False,...,1.0,0.0,1.000000,0.000000,1.000000,0.000000,24,1,2025,0
91281,3469570,False,0,1399.0,3,1,1,2,2,False,...,1.0,1.0,1.000000,1.000000,1.000000,1.000000,21,1,2025,0


#### Grouping by era

In [56]:
df['era'] = df['year'] // 10 * 10
df['era']

0        2000
1        1990
2        1990
3        2000
4        2000
         ... 
91278    2020
91279    2020
91280    2020
91281    2020
91282    2020
Name: era, Length: 91283, dtype: int32

### Split the data to train/test/real

In [57]:
df_real = df[df['price'].isna()]
df_real

Unnamed: 0,steam_appid,is_free,total_recommendations,price,num_languages,num_developers,num_publishers,num_genres,num_categories,lang_Afrikaans,...,developers_11,developers_12,developers_13,developers_14,developers_15,day,month,year,age,era
8,320,False,9393,,1,1,1,1,4,False,...,0.0,0.0,0.0,0.0,1.0,1,11,2004,21,2000
34,2700,False,4098,,10,2,2,2,2,False,...,0.0,1.0,1.0,1.0,0.5,12,3,2008,17,2000
44,3400,False,0,,1,1,1,1,2,False,...,1.0,1.0,0.0,0.0,0.0,30,8,2006,19,2000
59,4580,False,6998,,5,1,1,1,4,False,...,0.0,0.0,0.0,1.0,0.0,7,8,2007,18,2000
61,4760,False,12461,,1,1,1,1,4,False,...,0.0,0.0,0.0,1.0,1.0,28,8,2007,18,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87108,3136360,False,0,,2,1,1,1,3,False,...,1.0,1.0,0.0,0.0,0.0,22,1,2025,0,2020
87596,3164240,False,0,,8,1,1,3,10,False,...,0.0,1.0,0.0,0.0,0.0,2,9,2024,1,2020
88157,3198850,False,0,,13,1,1,1,5,False,...,1.0,1.0,1.0,1.0,0.0,13,1,2025,0,2020
89567,3293360,False,0,,1,1,1,2,2,False,...,0.0,0.0,1.0,0.0,1.0,9,11,2024,1,2020


In [58]:
df_real

Unnamed: 0,steam_appid,is_free,total_recommendations,price,num_languages,num_developers,num_publishers,num_genres,num_categories,lang_Afrikaans,...,developers_11,developers_12,developers_13,developers_14,developers_15,day,month,year,age,era
8,320,False,9393,,1,1,1,1,4,False,...,0.0,0.0,0.0,0.0,1.0,1,11,2004,21,2000
34,2700,False,4098,,10,2,2,2,2,False,...,0.0,1.0,1.0,1.0,0.5,12,3,2008,17,2000
44,3400,False,0,,1,1,1,1,2,False,...,1.0,1.0,0.0,0.0,0.0,30,8,2006,19,2000
59,4580,False,6998,,5,1,1,1,4,False,...,0.0,0.0,0.0,1.0,0.0,7,8,2007,18,2000
61,4760,False,12461,,1,1,1,1,4,False,...,0.0,0.0,0.0,1.0,1.0,28,8,2007,18,2000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87108,3136360,False,0,,2,1,1,1,3,False,...,1.0,1.0,0.0,0.0,0.0,22,1,2025,0,2020
87596,3164240,False,0,,8,1,1,3,10,False,...,0.0,1.0,0.0,0.0,0.0,2,9,2024,1,2020
88157,3198850,False,0,,13,1,1,1,5,False,...,1.0,1.0,1.0,1.0,0.0,13,1,2025,0,2020
89567,3293360,False,0,,1,1,1,2,2,False,...,0.0,0.0,1.0,0.0,1.0,9,11,2024,1,2020


In [59]:
df.dropna(subset=['price'], inplace=True)
# df.drop(columns=['steam_appid'], inplace=True)
df.isna().sum()

steam_appid              0
is_free                  0
total_recommendations    0
price                    0
num_languages            0
                        ..
day                      0
month                    0
year                     0
age                      0
era                      0
Length: 219, dtype: int64

In [60]:
df.to_csv('Data/features_extracted.csv', index=False)
df_real.to_csv('Data/x_real.csv', index=False)