In [None]:
# dataframe manipulation, data sourcing, decorators (optional)

# Potential dataset sources
# https://www.kaggle.com/
# https://archive.ics.uci.edu/
# https://paperswithcode.com/datasets
# https://en.wikipedia.org/wiki/List_of_datasets_for_machine-learning_research

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Loading data, quick overview

In [2]:
# https://www.kaggle.com/datasets/asaniczka/video-game-sales-2024
vgchartz = pd.read_csv('./vgchartz-2024.csv')

In [5]:
vgchartz.info() # Look for column types and null counts 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64016 entries, 0 to 64015
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   img           64016 non-null  object 
 1   title         64016 non-null  object 
 2   console       64016 non-null  object 
 3   genre         64016 non-null  object 
 4   publisher     64016 non-null  object 
 5   developer     63999 non-null  object 
 6   critic_score  6678 non-null   float64
 7   total_sales   18922 non-null  float64
 8   na_sales      12637 non-null  float64
 9   jp_sales      6726 non-null   float64
 10  pal_sales     12824 non-null  float64
 11  other_sales   15128 non-null  float64
 12  release_date  56965 non-null  object 
 13  last_update   17879 non-null  object 
dtypes: float64(6), object(8)
memory usage: 6.8+ MB


In [7]:
vgchartz[::2] # get every second item

Unnamed: 0,img,title,console,genre,publisher,developer,critic_score,total_sales,na_sales,jp_sales,pal_sales,other_sales,release_date,last_update
0,/games/boxart/full_6510540AmericaFrontccc.jpg,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,9.4,20.32,6.37,0.99,9.85,3.12,2013-09-17,
2,/games/boxart/827563ccc.jpg,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,9.6,16.15,8.41,0.47,5.49,1.78,2002-10-28,
4,/games/boxart/full_4990510AmericaFrontccc.jpg,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,8.1,15.09,6.18,0.41,6.05,2.44,2015-11-06,2018-01-14
6,/games/boxart/full_call-of-duty-black-ops_5Ame...,Call of Duty: Black Ops,X360,Shooter,Activision,Treyarch,8.8,14.74,9.76,0.11,3.73,1.14,2010-11-09,
8,/games/boxart/full_1977964AmericaFrontccc.jpg,Call of Duty: Black Ops II,X360,Shooter,Activision,Treyarch,8.4,13.86,8.27,0.07,4.32,1.20,2012-11-13,2018-04-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64006,/games/boxart/default.jpg,Without Within,PC,Visual Novel,InvertMouse,InvertMouse,,,,,,,2015-01-22,2018-12-25
64008,/games/boxart/default.jpg,Without Within 3,PC,Visual Novel,InvertMouse,InvertMouse,,,,,,,2018-05-03,2018-12-25
64010,/games/boxart/full_2294305JapanFrontccc.jpg,World End Syndrome,PS4,Visual Novel,Arc System Works,Arc System Works,,,,,,,2018-04-26,2019-04-03
64012,/games/boxart/full_8031506AmericaFrontccc.jpg,"Yoru, Tomosu",PS4,Visual Novel,Nippon Ichi Software,Nippon Ichi Software,,,,,,,2020-07-30,2020-05-09


In [13]:
vgchartz.iloc[::5, -3:] # using numpy comma separated slices, accessing every 5th row
                        # of only the last two columns 

Unnamed: 0,other_sales,release_date,last_update
0,3.12,2013-09-17,
5,1.33,2011-11-08,
10,1.28,2009-11-10,
15,1.73,2017-09-29,2018-04-02
20,0.98,2013-11-05,2018-03-21
...,...,...,...
63995,,2020-09-10,2020-07-31
64000,,2017-06-06,2018-12-17
64005,,2022-12-08,2023-07-05
64010,,2018-04-26,2019-04-03


In [15]:
vgchartz.loc[::5, ['other_sales', 'release_date']] # using loc - second argument is 
                                                   # a list of column names 

Unnamed: 0,other_sales,release_date
0,3.12,2013-09-17
5,1.33,2011-11-08
10,1.28,2009-11-10
15,1.73,2017-09-29
20,0.98,2013-11-05
...,...,...
63995,,2020-09-10
64000,,2017-06-06
64005,,2022-12-08
64010,,2018-04-26


In [17]:
vgchartz.columns # Review column names 

Index(['img', 'title', 'console', 'genre', 'publisher', 'developer',
       'critic_score', 'total_sales', 'na_sales', 'jp_sales', 'pal_sales',
       'other_sales', 'release_date', 'last_update'],
      dtype='object')

---

## Finding total sales by title, augmenting original data

In [24]:
vgchartz[['title', 'total_sales']]

Unnamed: 0,title,total_sales
0,Grand Theft Auto V,20.32
1,Grand Theft Auto V,19.39
2,Grand Theft Auto: Vice City,16.15
3,Grand Theft Auto V,15.86
4,Call of Duty: Black Ops 3,15.09
...,...,...
64011,XBlaze Lost: Memories,
64012,"Yoru, Tomosu",
64013,"Yoru, Tomosu",
64014,Yunohana SpRING! ~Mellow Times~,


In [28]:
vgchartz.groupby('title').total_sales

title
"Nuke It"                        NaN
#DRIVE Rally                     NaN
#IDARB                           NaN
#killallzombies                  NaN
'70s Robot Anime: Geppy-X        NaN
                                ... 
yOm                              NaN
yOm_fury                         NaN
¡Shin Chan Flipa en colores!    0.14
じんるいのみなさまへ                       NaN
レミロア~少女と異世界と魔導書                  NaN
Name: total_sales, Length: 39798, dtype: float64

In [33]:
sales_sorted = vgchartz.sort_values(by='total_sales', ascending=False)

In [41]:
sales_grouped = pd.DataFrame(vgchartz.groupby('title')
                                     .total_sales.sum()
                                     .sort_values(ascending=False))

In [42]:
sales_grouped = sales_grouped.reset_index()

In [56]:
sales_grouped['combined_total_sales'] = sales_grouped.total_sales
sales_grouped = sales_grouped.drop(columns='total_sales')
sales_grouped

Unnamed: 0,title,combined_total_sales
0,Grand Theft Auto V,64.29
1,Call of Duty: Black Ops,30.99
2,Call of Duty: Modern Warfare 3,30.71
3,Call of Duty: Black Ops II,29.59
4,Call of Duty: Ghosts,28.80
...,...,...
39793,Hisshou Pachinko*Pachi-Slot Kouryaku Series Vo...,0.00
39794,Hisshou Pachinko*Pachi-Slot Kouryaku Series Vo...,0.00
39795,Hisshou Pachinko*Pachi-Slot Kouryaku Series Vo...,0.00
39796,Hisshou Pachinko*Pachi-Slot Kouryaku Series Vo...,0.00


In [45]:
vgchartz.total_sales = sales_grouped.total_sales # The size and order do not match

In [50]:
vgchartz['total_sales'].isna().sum() # Now we have tons of null values! 

24218

In [57]:
# Reset the original dataframe
vgchartz = pd.read_csv('./vgchartz-2024.csv')

In [58]:
vgchartz

Unnamed: 0,img,title,console,genre,publisher,developer,critic_score,total_sales,na_sales,jp_sales,pal_sales,other_sales,release_date,last_update
0,/games/boxart/full_6510540AmericaFrontccc.jpg,Grand Theft Auto V,PS3,Action,Rockstar Games,Rockstar North,9.4,20.32,6.37,0.99,9.85,3.12,2013-09-17,
1,/games/boxart/full_5563178AmericaFrontccc.jpg,Grand Theft Auto V,PS4,Action,Rockstar Games,Rockstar North,9.7,19.39,6.06,0.60,9.71,3.02,2014-11-18,2018-01-03
2,/games/boxart/827563ccc.jpg,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,9.6,16.15,8.41,0.47,5.49,1.78,2002-10-28,
3,/games/boxart/full_9218923AmericaFrontccc.jpg,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,,15.86,9.06,0.06,5.33,1.42,2013-09-17,
4,/games/boxart/full_4990510AmericaFrontccc.jpg,Call of Duty: Black Ops 3,PS4,Shooter,Activision,Treyarch,8.1,15.09,6.18,0.41,6.05,2.44,2015-11-06,2018-01-14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64011,/games/boxart/full_2779838AmericaFrontccc.jpg,XBlaze Lost: Memories,PC,Visual Novel,Aksys Games,Arc System Works,,,,,,,2016-08-11,2019-01-28
64012,/games/boxart/full_8031506AmericaFrontccc.jpg,"Yoru, Tomosu",PS4,Visual Novel,Nippon Ichi Software,Nippon Ichi Software,,,,,,,2020-07-30,2020-05-09
64013,/games/boxart/full_6553045AmericaFrontccc.jpg,"Yoru, Tomosu",NS,Visual Novel,Nippon Ichi Software,Nippon Ichi Software,,,,,,,2020-07-30,2020-05-09
64014,/games/boxart/full_6012940JapanFrontccc.png,Yunohana SpRING! ~Mellow Times~,NS,Visual Novel,Idea Factory,Otomate,,,,,,,2019-02-28,2019-02-24


In [60]:
df = pd.merge(vgchartz, sales_grouped, on='title')

---

## Looking at the ratio of NA to JP sales, sorting/filtering

In [69]:
df['NA_JP_ratio'] = df.na_sales / df.jp_sales

In [74]:
america = df[(df.NA_JP_ratio >= 10) & (df.NA_JP_ratio < float('inf'))].sort_values('NA_JP_ratio', ascending=False)

In [77]:
america[america.title.str.contains('Grand Theft Auto')]

Unnamed: 0,img,title,console,genre,publisher,developer,critic_score,total_sales,na_sales,jp_sales,pal_sales,other_sales,release_date,last_update,combined_total_sales,NA_JP_ratio
3,/games/boxart/full_8047513AmericaFrontccc.jpg,Grand Theft Auto V,XOne,Action,Rockstar Games,Rockstar North,9.0,8.72,4.7,0.01,3.25,0.76,2014-11-18,2018-04-11,64.29,470.0
2,/games/boxart/full_9218923AmericaFrontccc.jpg,Grand Theft Auto V,X360,Action,Rockstar Games,Rockstar North,,15.86,9.06,0.06,5.33,1.42,2013-09-17,,64.29,151.0
58,/games/boxart/full_1182151AmericaFrontccc.jpg,Grand Theft Auto IV,X360,Action,Rockstar Games,Rockstar North,10.0,11.09,6.8,0.14,3.11,1.04,2008-04-29,,22.53,48.571429
1188,/games/boxart/full_7346149AmericaFrontccc.jpg,Grand Theft Auto: Episodes from Liberty City,X360,Adventure,Rockstar Games,Rockstar North,9.0,2.6,1.08,0.03,1.22,0.27,2009-10-29,,4.78,36.0
315,/games/boxart/1161187ccc.jpg,Grand Theft Auto: Vice City Stories,PS2,Action,Rockstar Games,Rockstar Leeds,,0.97,0.78,0.03,0.03,0.13,2007-03-05,,6.05,26.0
47,/games/boxart/3570928ccc.jpg,Grand Theft Auto III,PS2,Action,Rockstar Games,DMA Design,9.5,13.1,6.99,0.3,4.51,1.3,2001-10-23,,13.11,23.3
136,/games/boxart/full_1027141AmericaFrontccc.jpg,Grand Theft Auto: Liberty City Stories,PS2,Action,Rockstar Games,Rockstar Leeds,8.3,3.54,1.56,0.07,1.4,0.5,2006-06-06,,11.26,22.285714
1439,/games/boxart/6725603ccc.jpg,Grand Theft Auto,PS,Action,Take-Two Interactive,DMA Design,6.0,2.32,0.79,0.04,1.35,0.14,1998-06-30,,2.56,19.75
8,/games/boxart/827563ccc.jpg,Grand Theft Auto: Vice City,PS2,Action,Rockstar Games,Rockstar North,9.6,16.15,8.41,0.47,5.49,1.78,2002-10-28,,16.19,17.893617
135,/games/boxart/1340019ccc.jpg,Grand Theft Auto: Liberty City Stories,PSP,Action,Rockstar Games,Rockstar Leeds,8.8,7.72,2.9,0.24,2.83,1.74,2005-10-25,,11.26,12.083333


---

## Fun with decorators

Terminology: decorators take advantage of [closures](https://en.wikipedia.org/wiki/Closure_(computer_programming)). Accessing special variables like `__closure__` and `__name__` while the program is running (aka "during runtime") is a form of [reflection](https://en.wikipedia.org/wiki/Reflective_programming).

In [190]:
# Making a simple execution counter decorator 
# V1, no global variables, count stored in the closure.


# Uncomment the print statements to see when each code block runs
def exc_counter(func):
    count = 0
#     print('ran exc_counter')
    def wrapper(*args, **kwargs):
        nonlocal count 
#         print('called the wrapper')
        count += 1
        print('Called', func.__name__, count, 'times')
        func(*args, **kwargs)
    return wrapper


@exc_counter
def hello(name):
    return 'hello ' + name
    
    
@exc_counter
def add(x, y):
    return x + y

# Simulate calling the functions a bunch of times
for _ in range(10):
    hello('_')
for _ in range(25):
    add(1,3)

Called hello 1 times
Called hello 2 times
Called hello 3 times
Called hello 4 times
Called hello 5 times
Called hello 6 times
Called hello 7 times
Called hello 8 times
Called hello 9 times
Called hello 10 times
Called add 1 times
Called add 2 times
Called add 3 times
Called add 4 times
Called add 5 times
Called add 6 times
Called add 7 times
Called add 8 times
Called add 9 times
Called add 10 times
Called add 11 times
Called add 12 times
Called add 13 times
Called add 14 times
Called add 15 times
Called add 16 times
Called add 17 times
Called add 18 times
Called add 19 times
Called add 20 times
Called add 21 times
Called add 22 times
Called add 23 times
Called add 24 times
Called add 25 times


In [191]:
hello('name') # 11 times
add(1,2) # 26 times

Called hello 11 times
Called add 26 times


In [192]:
# We can access their values directly, but it's not recommended!
print(add.__closure__[0].cell_contents)
print(hello.__closure__[0].cell_contents)

26
11


In [180]:
register

{'hello': 4, 'add': 2}

In [195]:
# V2, better option if we want to maintain access to the values 
# used by our decorators: register in a global container

register = {}

def exc_counter(func):
    # Create an entry in the dictionary and set it to 0
    register[func.__name__] = 0

    def wrapper(*args, **kwargs):
        # Access that entry and increment it each call
        register[func.__name__] += 1
        func(*args, **kwargs)
    return wrapper


@exc_counter
def hello(name):
    return 'hello' + name
    
    
@exc_counter
def add(x, y):
    return x + y
    
# Simulate calling the functions a bunch of times
for _ in range(10):
    hello('_')
for _ in range(25):
    add(1,3)

In [196]:
print(register) # {'hello': 10, 'add': 25}

{'hello': 10, 'add': 25}
