In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import datetime
import matplotlib.animation as ani
import concurrent.futures
import functools
import os
import pymysql
import bar_chart_race as bcr

from matplotlib.ticker import FuncFormatter
from sqlalchemy import create_engine
from time import time
from IPython.display import HTML

import warnings
warnings.filterwarnings('ignore')

In [2]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [16]:
engine = create_engine('mysql+pymysql://student:xxx@xxxxx:3306/publications')

In [4]:
games = pd.read_csv('../data/data/game_info.csv')
games.head()

Unnamed: 0,id,slug,name,metacritic,metacritic_platforms,released,tba,updated,website,rating,...,added_status_dropped,added_status_playing,rating_count_exceptional,rating_pct_exceptional,rating_count_recommended,rating_pct_recommended,rating_count_meh,rating_pct_meh,rating_count_skip,rating_pct_skip
0,1,dgeneration-hd,D/Generation HD,,[],2015-10-23,False,2019-09-17T11:58:57,http://dgeneration.net,0.0,...,0,0,1,33.33,0,0.0,0,0.0,2.0,66.67
1,10,g-prime,G Prime Into The Rain,,[],2016-01-06,False,2019-11-06T23:04:19,,0.0,...,0,0,1,33.33,0,0.0,0,0.0,2.0,66.67
2,100,land-sliders,Land Sliders,,[],2015-09-24,False,2019-10-22T13:56:16,http://prettygreat.com,0.0,...,1,0,1,50.0,0,0.0,0,0.0,1.0,50.0
3,10000,gods-and-idols,Gods and Idols,,[],2016-12-12,False,2019-09-17T13:37:13,http://www.godsandidols.com/,0.0,...,0,0,0,0.0,0,0.0,2,40.0,3.0,60.0
4,100006,fading-light-descent,Fading Light: Descent,,[],2017-08-24,False,2019-01-09T12:41:06,,0.0,...,0,0,0,0.0,0,0.0,0,0.0,0.0,0.0


In [5]:
games.dtypes

id                             int64
slug                          object
name                          object
metacritic                   float64
metacritic_platforms          object
released                      object
tba                             bool
updated                       object
website                       object
rating                       float64
rating_top                    object
ratings                       object
reactions                     object
added                          int64
playtime                       int64
screenshots_count              int64
movies_count                   int64
creators_count                 int64
achievements_count             int64
parent_achievements_count      int64
reddit_count                   int64
twitch_count                   int64
youtube_count                  int64
reviews_text_count             int64
ratings_count                  int64
suggestions_count            float64
parents_count                  int64
a

In [6]:
games['released_dt'] = games['released'].astype('datetime64[ns]')
games['month_year'] =  pd.DatetimeIndex(games['released']).month
games.isnull().sum()/len(games)*100

id                            0.000000
slug                          0.000451
name                          0.000451
metacritic                   98.995601
metacritic_platforms          0.000000
released                      5.192104
tba                           0.000000
updated                       0.000000
website                      86.087986
rating                        0.000000
rating_top                    0.000000
ratings                       0.000902
reactions                    97.081331
added                         0.000000
playtime                      0.000000
screenshots_count             0.000000
movies_count                  0.000000
creators_count                0.000000
achievements_count            0.000000
parent_achievements_count     0.000000
reddit_count                  0.000000
twitch_count                  0.000000
youtube_count                 0.000000
reviews_text_count            0.000000
ratings_count                 0.000000
suggestions_count        

In [7]:
games['rating_dummy'] = np.where(games['rating'] == 0,1,games['rating'])
games['engagement_score'] = games['added']*games['rating_dummy']

In [8]:
games.shape

(443250, 54)

## data clean-up - removing some n/a, filling NaNs

In [9]:
# fillna
games.fillna({
    'website':'unknown', 
    'platforms':'unknown',
    'publishers':'unknown',
    'developers':'unknown',
    'genres':'unknown',
    'tags':'none',
    'rating_count_skip':0, 
    'rating_pct_skip':0,
    'suggestions_count':0,
    'esrb_rating':'unrated'
}, inplace = True)

#drop games that do not have a name
games = games[games['name'].notna()]


games['year_released'] = games['released'].str[:4]


In [10]:
games['multiplayer'] = np.where(games['genres'].apply(lambda x : ((x == 'Massively Multiplayer') | (x.find('|Massively Multiplayer') == True) | (x.find('Massively Multiplayer|') == True))),'MMP', 'non-MMP')

In [11]:
games['genre type'] = np.where(games['genres'].apply(lambda x : ((x == 'Indie') | (x.find('|Indie') == True) | (x.find('Indie|') == True))),'Indie', 'Mainstream')

In [12]:
games = games.reset_index()
games.to_csv('../data/data/games_info_cleaned.csv')

In [13]:
platforms = pd.read_csv('../data/data/platforms_info.csv')
platforms['rating_dummy'] = np.where(platforms['rating'] == 0,1,platforms['rating'])
platforms['engagement_score'] = platforms['added']*platforms['rating_dummy']
platforms.sort_values('engagement_score', ascending = False).head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,platform,name,year,rating,ratings_count,added,reddit_count,twitch_count,...,esrb_rating,added_status_yet,added_status_owned,added_status_beaten,added_status_toplay,added_status_dropped,added_status_playing,rating_dummy,engagement_score,parent_platforms
182231,182231,1195,Xbox 360,Grand Theft Auto V,2013.0,4.48,4185,13360,1912,101,...,Mature,318,8129,3442,3442,3442,3442,4.48,59852.8,Xbox
182232,182232,1196,Xbox One,Grand Theft Auto V,2013.0,4.48,4185,13360,1912,101,...,Mature,318,8129,3442,3442,3442,3442,4.48,59852.8,Xbox
182226,182226,1190,PC,Grand Theft Auto V,2013.0,4.48,4185,13360,1912,101,...,Mature,318,8129,3442,3442,3442,3442,4.48,59852.8,PC
182227,182227,1191,Xbox Series S/X,Grand Theft Auto V,2013.0,4.48,4185,13360,1912,101,...,Mature,318,8129,3442,3442,3442,3442,4.48,59852.8,Xbox
182228,182228,1192,PlayStation 5,Grand Theft Auto V,2013.0,4.48,4185,13360,1912,101,...,Mature,318,8129,3442,3442,3442,3442,4.48,59852.8,PlayStation


In [14]:
dc = dict(zip(sorted(set(platforms.platform)),sorted(set(platforms.platform))))
dict_platfrom_parents = {
 '3DO': '3DO',
 'Android': 'Android',
 'Apple II': 'Apple II',
 'Atari 2600': 'Atari',
 'Atari 5200': 'Atari',
 'Atari 7800': 'Atari',
 'Atari 8-bit': 'Atari',
 'Atari Flashback': 'Atari',
 'Atari Lynx': 'Atari',
 'Atari ST': 'Atari',
 'Atari XEGS': 'Atari',
 'Classic Macintosh': 'Classic Macintosh',
 'Commodore / Amiga': 'Commodore / Amiga',
 'Dreamcast': 'Dreamcast',
 'Everyone': 'Everyone',
 'Game Boy': 'Game Boy',
 'Game Boy Advance': 'Game Boy',
 'Game Boy Color': 'Game Boy',
 'Game Gear': 'Game Gear',
 'GameCube': 'GameCube',
 'Genesis': 'Genesis',
 'Jaguar': 'Atari',
 'Linux': 'Linux',
 'NES': 'NES',
 'Neo Geo': 'Neo Geo',
 'Nintendo 3DS': 'Nintendo',
 'Nintendo 64': 'Nintendo',
 'Nintendo DS': 'Nintendo',
 'Nintendo DSi': 'Nintendo',
 'Nintendo Switch': 'Nintendo',
 'PC': 'PC',
 'PS Vita': 'PS Vita',
 'PSP': 'PSP',
 'PlayStation': 'PlayStation',
 'PlayStation 2': 'PlayStation',
 'PlayStation 3': 'PlayStation',
 'PlayStation 4': 'PlayStation',
 'PlayStation 5': 'PlayStation',
 'SEGA 32X': 'SEGA',
 'SEGA CD': 'SEGA',
 'SEGA Master System': 'SEGA',
 'SEGA Saturn': 'SEGA',
 'SNES': 'SNES',
 'Web': 'Web',
 'Wii': 'Nintendo',
 'Wii U': 'Nintendo',
 'Xbox': 'Xbox',
 'Xbox 360': 'Xbox',
 'Xbox One': 'Xbox',
 'Xbox Series S/X': 'Xbox',
 'iOS': 'iOS',
 'macOS': 'macOS',
 'unknown': 'unknown'}

platforms['parent_platforms'] = platforms['platform'].map(dict_platfrom_parents)

In [15]:
platforms.to_csv('../data/data/platforms_info.csv')