In [143]:
import time
import numpy as np
import pandas as pd
import requests

import json
from pandas.io.json import json_normalize




In [183]:
from ast import literal_eval

def try_literal_eval(s):
    try:
        return literal_eval(s)
    except ValueError:
        return s


steam_data = pd.read_csv('initial_preprocessed.csv') 
# steam_data = pd.json_normalize(steam_data, errors='ignore')
# all columns that are dicts are being read in as strings - look in to json_normalize as possibly better solution?
steam_data['price_overview'] = steam_data.price_overview.apply(try_literal_eval)
steam_data['platforms'] = steam_data.platforms.apply(try_literal_eval)
steam_data['recommendations'] = steam_data.recommendations.apply(try_literal_eval)
steam_data['screenshots'] = steam_data.screenshots.apply(try_literal_eval)
steam_data['movies'] = steam_data.movies.apply(try_literal_eval)
steam_data['genres'] = steam_data.genres.apply(try_literal_eval)


steam_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 943 entries, 0 to 942
Data columns (total 41 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   index                    943 non-null    int64 
 1   success                  943 non-null    bool  
 2   type                     943 non-null    object
 3   name                     943 non-null    object
 4   steam_appid              943 non-null    int64 
 5   required_age             943 non-null    int64 
 6   is_free                  943 non-null    bool  
 7   detailed_description     745 non-null    object
 8   about_the_game           745 non-null    object
 9   short_description        919 non-null    object
 10  supported_languages      939 non-null    object
 11  header_image             943 non-null    object
 12  website                  527 non-null    object
 13  pc_requirements          943 non-null    object
 14  mac_requirements         943 non-null    o

### General functions for reuse

In [219]:
def flatten_field(df, field, rename_dict, drops_list):
    '''
    takes in a dataframe column that is a dict and seprates it into 
    separate columns per key/value pair.  Can rename cols and drop 
    columns as specified
    
    df: dataframe to alter
    field: column to flatten
    rename_dict: dictionary of current_name: new_name pairs for updating
    drops_list: list of new columns to drop
    '''
    df_clean = pd.concat([df, df[field].apply(pd.Series)], axis=1)
    df_clean.drop(axis=1, columns=drops_list, inplace=True)
    df_clean.rename(columns=rename_dict, inplace=True)
    
    return df_clean


def remove_unused_data(df, column, valid_list):
    '''
    Removes rows that so not match any values in the valid_list 
    for the column
    '''

    contains = [df['type'].str.contains(i) for i in valid_list]
    df_clean = df[np.any(contains, axis=0)]
    return df_clean


def map_to_bool(df, mapping, col):
    '''
    maps values in a column to be just bools
    df: dataframe
    mapping: dict of mappings ex: {np.nan: False, 'full': True}
    col: name of col to convert
    '''
    return df[col].map(mapping)

def replace_with_count(df, col):
    '''
    gets the length of a column that is of type list
    '''
    df = pd.concat([df, df[col].str.len()], axis=1)
    # this results in both old and new columns having the same name 
    # so below code will remove the old screenshots dictionary column
    df = df.loc[:,~df.columns.duplicated(keep='last')]
    return df


def create_unique_bool_cols(df, col, prefix):
    '''
    Takes in a single columns in a dataframe, detmerines all unique values, 
    creates a column for each unique value in the dataframe and fills it 
    with a bool for each row indicating if that values exists for that row
    
    assumes column splits out into ['id','description'] pairs for uniqueness
    
    new column names will all be delimited with underscore
    
    df: Dataframe
    col: column to split out into multiple bool columns
    prefix: prefix of the new column names (genre -> genre_action, genre_adventure...).  
    Will use description to build new column name
    '''
    # first we need to create a table of all possible values then store those so we can access them
    s = df[col].apply(pd.Series)
    # combine everything into single column
    #todo: add logic to know number of cols on the fly
    y = s[0].append([s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9]],ignore_index=True).dropna()
    #split out dict to seprate columns
    z = y.apply(pd.Series)
    z = z.drop_duplicates(subset=['id','description'], keep="first")

    # create a new column for each unique value
    for index, row in z.iterrows():
        new_col = '{0} {1}'.format(prefix, row['description']).replace(" ", "_")
        df[new_col] = False

    # then fill those columns in the Dataframe with bools
    for index, row in df.iterrows():
        if type(row[col]) == float:
            continue
        for item in row[col]:
            new_name = '{0} {1}'.format(prefix, item['description']).replace(" ", "_")
            print(new_name)
#             todo: can't write to a DF as you iterate over it! fix it...
            row[new_name] = True
            print(row[new_name])
    
    return df

### Functions specific to my data

In [220]:
def initial_cleanup(df):
    
    del_cols = ['index',
        'success',
        'header_image',
        'pc_requirements',
        'mac_requirements',
        'linux_requirements',
        'support_info',
        'background',
        'legal_notice',
        'reviews',
        'content_descriptors']

    num_type_list = ['required_age']
    
    # set steam_appid as index
    df.set_index('steam_appid')

    # remove columns we don't care about
    df_clean = df.drop(columns=del_cols, axis=1)
    
    #update types to numeric
    for i in num_type_list:
        df_clean[i] = pd.to_numeric(df_clean[i])
    
    # trim down to just below types
    valid_types = ['game', 'dlc', 'demo']
    df_clean = remove_unused_data(df_clean, 'type', valid_types)

    # flatten cols as possible
    df_clean = flatten_price(df_clean)
    df_clean = flatten_platform(df_clean)
    df_clean = flatten_field(df_clean, 
                             'recommendations', 
                             {'total': 'recommendations'}, 
                             [0, 'recommendations'])
    
    
    # convert cols to bool type
    bool_col = 'controller_support'
    controller_mapping = {np.nan: False, 'full': True}
    df_clean[bool_col] = map_to_bool(df_clean, controller_mapping, bool_col)
    
    # convert cols to just counts
    df_clean = replace_with_count(df_clean, 'screenshots')
    df_clean.rename(columns={'screenshots': 'screenshot_count'}, inplace=True)

    df_clean = replace_with_count(df_clean, 'movies')
    df_clean.rename(columns={'movies': 'movie_count'}, inplace=True)
    
    # convert lists to bools for easy categorization
    #todo: below function doens't work - true never gets assigned to row/columns
    df_clean = create_unique_bool_cols(df_clean, 'genres', 'genre')

    
    return df_clean


def flatten_price(df):
    field_to_rename = {'currency': 'price_currency',
                         'discount_percent': 'price_discount_percent', 
                         'final': 'price_final', 
                         'initial': 'price_initial',
                         'recurring_sub': 'price_recurring_sub',
                         'recurring_sub_desc': 'price_recurring_sub_desc'}
    fields_to_drop = ['price_overview', 0, 'final_formatted', 'initial_formatted']
    
    df_clean = flatten_field(df, 'price_overview', field_to_rename, fields_to_drop)
        
    df_clean['price_final'] = df_clean['price_final'] / 100
    df_clean['price_initial'] = df_clean['price_initial'] / 100

    return df_clean

def flatten_platform(df):
    fields_to_rename = {'windows': 'windows_support', 
                       'mac': 'mac_support', 
                       'linux': 'linux_support'}
    fields_to_drop = ['platforms']
    
    df_clean = flatten_field(df, 'platforms', fields_to_rename, fields_to_drop)

    return df_clean




### Actually run the code!

In [221]:


data1 = initial_cleanup(steam_data)


data1.info()



genre_Design_&_Illustration
True
genre_Photo_Editing
True
genre_Web_Publishing
True
genre_Action
True
genre_Adventure
True
genre_Massively_Multiplayer
True
genre_RPG
True
genre_Strategy
True
genre_Action
True
genre_Casual
True
genre_Indie
True
genre_Adventure
True
genre_RPG
True
genre_Strategy
True
genre_Action
True
genre_Sports
True
genre_Action
True
genre_Casual
True
genre_Indie
True
genre_Simulation
True
genre_Strategy
True
genre_Design_&_Illustration
True
genre_Web_Publishing
True
genre_Design_&_Illustration
True
genre_Web_Publishing
True
genre_Action
True
genre_Casual
True
genre_Indie
True
genre_Simulation
True
genre_Early_Access
True
genre_Adventure
True
genre_Design_&_Illustration
True
genre_Web_Publishing
True
genre_Design_&_Illustration
True
genre_Web_Publishing
True
genre_Indie
True
genre_RPG
True
genre_Strategy
True
genre_Casual
True
genre_Indie
True
genre_Action
True
genre_RPG
True
genre_Action
True
genre_Adventure
True
genre_Indie
True
genre_Adventure
True
genre_Indie
True

genre_Indie
True
genre_RPG
True
genre_Action
True
genre_Adventure
True
genre_Casual
True
genre_Indie
True
genre_Casual
True
genre_Indie
True
genre_Strategy
True
genre_Action
True
genre_Casual
True
genre_Indie
True
genre_Early_Access
True
genre_Action
True
genre_Adventure
True
genre_Indie
True
genre_RPG
True
genre_Adventure
True
genre_Indie
True
genre_Indie
True
genre_RPG
True
genre_Strategy
True
genre_Action
True
genre_Adventure
True
genre_Casual
True
genre_Indie
True
genre_Action
True
genre_Adventure
True
genre_Casual
True
genre_Indie
True
genre_Strategy
True
genre_Casual
True
genre_Strategy
True
genre_Casual
True
genre_Action
True
genre_Casual
True
genre_Indie
True
genre_Indie
True
genre_RPG
True
genre_Casual
True
genre_Indie
True
genre_Strategy
True
genre_Casual
True
genre_Indie
True
genre_Simulation
True
genre_Violent
True
genre_Adventure
True
genre_Casual
True
genre_Indie
True
genre_Simulation
True
genre_Action
True
genre_Adventure
True
genre_Casual
True
genre_Indie
True
genre_Act

True
genre_RPG
True
genre_Strategy
True
genre_Simulation
True
genre_Indie
True
genre_RPG
True
genre_Strategy
True
genre_Action
True
genre_Adventure
True
genre_Indie
True
genre_Adventure
True
genre_Indie
True
genre_Action
True
genre_Adventure
True
genre_Indie
True
genre_RPG
True
genre_Adventure
True
genre_Casual
True
genre_Indie
True
genre_Racing
True
genre_Action
True
genre_Free_to_Play
True
genre_Massively_Multiplayer
True
genre_RPG
True
genre_Gore
True
genre_Action
True
genre_Adventure
True
genre_Casual
True
genre_Indie
True
genre_Action
True
genre_Free_to_Play
True
genre_Massively_Multiplayer
True
genre_RPG
True
genre_Indie
True
genre_Action
True
genre_Free_to_Play
True
genre_Strategy
True
genre_Early_Access
True
genre_Action
True
genre_Indie
True
genre_Indie
True
genre_Early_Access
True
genre_Casual
True
genre_Indie
True
genre_Simulation
True
genre_Strategy
True
genre_Adventure
True
genre_Casual
True
genre_Strategy
True
genre_Casual
True
genre_Indie
True
genre_Simulation
True
genre

In [222]:
pd.set_option('display.max_columns', 999)
data1

Unnamed: 0,type,name,steam_appid,required_age,is_free,detailed_description,about_the_game,short_description,supported_languages,website,developers,publishers,packages,package_groups,genres,release_date,fullgame,categories,dlc,controller_support,achievements,metacritic,demos,drm_notice,ext_user_account_notice,price_currency,price_discount_percent,price_final,price_initial,price_recurring_sub,price_recurring_sub_desc,windows_support,mac_support,linux_support,recommendations,screenshot_count,movie_count,genre_Design_&_Illustration,genre_Action,genre_Strategy,genre_Adventure,genre_Indie,genre_Casual,genre_Simulation,genre_Violent,genre_Education,genre_Video_Production,genre_Nudity,genre_Free_to_Play,genre_RPG,genre_Racing,genre_Gore,genre_Sexual_Content,genre_Animation_&_Modeling,genre_Sports,genre_Photo_Editing,genre_Web_Publishing,genre_Utilities,genre_Massively_Multiplayer,genre_Early_Access,genre_Software_Training,genre_Game_Development
0,game,Swatcher,579920,0,False,Swatcher is a color palette creation tool with...,Swatcher is a color palette creation tool with...,Swatcher is a color palette creation tool with...,English,http://swatcherapp.com,['Frozen Flame'],['Frozen Flame'],[148602],"[{'name': 'default', 'title': 'Buy Swatcher', ...","[{'id': '53', 'description': 'Design & Illustr...","{'coming_soon': False, 'date': 'Jan 13, 2017'}",,,,False,,,,,,USD,0.0,4.99,4.99,,,True,False,False,,6.0,2.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,dlc,Secret World Legends: Supernatural Bundle,579930,0,False,<h1>Secret World Legends</h1><p>Secret World L...,You’ve heeded the call; now explore the possib...,You’ve heeded the call; now explore the possib...,"English<strong>*</strong>, French<strong>*</st...",https://secretworldlegends.com/,['Funcom'],['Funcom'],[148605],"[{'name': 'default', 'title': 'Buy Secret Worl...","[{'id': '1', 'description': 'Action'}, {'id': ...","{'coming_soon': False, 'date': 'Jul 31, 2017'}","{'appid': '215280', 'name': 'Secret World Lege...","[{'id': 1, 'description': 'Multi-player'}, {'i...",,False,,,,,,USD,0.0,59.99,59.99,,,True,False,False,,7.0,1.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,game,Race for the Galaxy,579940,0,False,"Explore, Develop, Settle, Trade, Consume, or P...","Explore, Develop, Settle, Trade, Consume, or P...","Explore, Settle, Develop, Trade, Consume, or P...",English,http://templegatesgames.com,['Temple Gates Games'],['Temple Gates Games'],[148608],"[{'name': 'default', 'title': 'Buy Race for th...","[{'id': '2', 'description': 'Strategy'}]","{'coming_soon': False, 'date': 'Jun 27, 2017'}",,"[{'id': 2, 'description': 'Single-player'}, {'...","[647070, 647071, 762070]",False,,,,,,USD,0.0,6.99,6.99,,,True,False,False,178.0,5.0,1.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,demo,Don't Mess Up Demo,579970,0,True,Don't Mess Up is a collection of VR minigames ...,Don't Mess Up is a collection of VR minigames ...,Don't Mess Up is a collection of VR minigames ...,English<strong>*</strong><br><strong>*</strong...,,['OKatBest'],['OKatBest'],,[],"[{'id': '1', 'description': 'Action'}, {'id': ...","{'coming_soon': False, 'date': 'Jan 13, 2017'}","{'appid': '575170', 'name': ""Don't Mess Up""}","[{'id': 2, 'description': 'Single-player'}, {'...",,False,,,,,,,,,,,,True,False,False,,7.0,,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5,game,Lost Eden,579980,0,False,From the makers of Dune™ and Megarace™<br>Lost...,From the makers of Dune™ and Megarace™<br>Lost...,An adventure-strategy game in a world never be...,"English<strong>*</strong>, Italian, Spanish - ...",http://www.interplay.com,['Cryo Interactive'],['Interplay'],[148620],"[{'name': 'default', 'title': 'Buy Lost Eden',...","[{'id': '25', 'description': 'Adventure'}, {'i...","{'coming_soon': False, 'date': 'Feb 13, 2017'}",,"[{'id': 2, 'description': 'Single-player'}]",,False,,,,,,USD,0.0,9.99,9.99,,,True,False,False,,9.0,1.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
938,dlc,Toukiden 2 - Armor: Ibuki Outfit / Oka Outfit,569685,0,False,Allows you to use the armor &quot;Ibuki costum...,Allows you to use the armor &quot;Ibuki costum...,Allows you to use the armor &quot;Ibuki costum...,"English, Japanese<strong>*</strong><br><strong...",,"['KOEI TECMO GAMES CO., LTD.']","['KOEI TECMO GAMES CO., LTD.']",[144324],"[{'name': 'default', 'title': 'Buy Toukiden 2 ...","[{'id': '1', 'description': 'Action'}]","{'coming_soon': False, 'date': 'Mar 28, 2017'}","{'appid': '551730', 'name': 'Toukiden 2'}","[{'id': 2, 'description': 'Single-player'}, {'...",,False,,,,,,USD,0.0,1.99,1.99,,,True,False,False,,2.0,,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
939,dlc,Toukiden 2 - Armor: Fugaku Outfit / Nagi Outfit,569686,0,False,Allows you to use the armor &quot;Fugaku costu...,Allows you to use the armor &quot;Fugaku costu...,Allows you to use the armor &quot;Fugaku costu...,"English, Japanese<strong>*</strong><br><strong...",,"['KOEI TECMO GAMES CO., LTD.']","['KOEI TECMO GAMES CO., LTD.']",[144327],"[{'name': 'default', 'title': 'Buy Toukiden 2 ...","[{'id': '1', 'description': 'Action'}]","{'coming_soon': False, 'date': 'Mar 28, 2017'}","{'appid': '551730', 'name': 'Toukiden 2'}","[{'id': 2, 'description': 'Single-player'}, {'...",,False,,,,,,USD,0.0,1.99,1.99,,,True,False,False,,2.0,,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
940,dlc,Toukiden 2 - Mission Collection Set 1,569687,0,False,Below 9 missions are included in this set.<br ...,Below 9 missions are included in this set.<br ...,9 missions are included in this set. *You will...,"English, Japanese<strong>*</strong><br><strong...",,"['KOEI TECMO GAMES CO., LTD.']","['KOEI TECMO GAMES CO., LTD.']",[144330],"[{'name': 'default', 'title': 'Buy Toukiden 2 ...","[{'id': '1', 'description': 'Action'}]","{'coming_soon': False, 'date': 'Mar 28, 2017'}","{'appid': '551730', 'name': 'Toukiden 2'}","[{'id': 2, 'description': 'Single-player'}, {'...",,False,,,,,,USD,0.0,4.99,4.99,,,True,False,False,,3.0,,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
941,dlc,Toukiden 2 - Mission Collection Set 2,569688,0,False,Below 9 missions are included in this set.<br ...,Below 9 missions are included in this set.<br ...,9 missions are included in this set. *You will...,"English, Japanese<strong>*</strong><br><strong...",,"['KOEI TECMO GAMES CO., LTD.']","['KOEI TECMO GAMES CO., LTD.']",[144333],"[{'name': 'default', 'title': 'Buy Toukiden 2 ...","[{'id': '1', 'description': 'Action'}]","{'coming_soon': False, 'date': 'Apr 4, 2017'}","{'appid': '551730', 'name': 'Toukiden 2'}","[{'id': 2, 'description': 'Single-player'}, {'...",,False,,,,,,USD,0.0,4.99,4.99,,,True,False,False,,4.0,,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [224]:
data1[data1['genre_Design_&_Illustration']]



Unnamed: 0,type,name,steam_appid,required_age,is_free,detailed_description,about_the_game,short_description,supported_languages,website,developers,publishers,packages,package_groups,genres,release_date,fullgame,categories,dlc,controller_support,achievements,metacritic,demos,drm_notice,ext_user_account_notice,price_currency,price_discount_percent,price_final,price_initial,price_recurring_sub,price_recurring_sub_desc,windows_support,mac_support,linux_support,recommendations,screenshot_count,movie_count,genre_Design_&_Illustration,genre_Action,genre_Strategy,genre_Adventure,genre_Indie,genre_Casual,genre_Simulation,genre_Violent,genre_Education,genre_Video_Production,genre_Nudity,genre_Free_to_Play,genre_RPG,genre_Racing,genre_Gore,genre_Sexual_Content,genre_Animation_&_Modeling,genre_Sports,genre_Photo_Editing,genre_Web_Publishing,genre_Utilities,genre_Massively_Multiplayer,genre_Early_Access,genre_Software_Training,genre_Game_Development
