In [None]:
import os
import pandas as pd
import numpy as np

from config_file import config

In [198]:
def sanitize_filename(name):
    special = "\\/:*?\"<>|+"
    new_name = "".join(c if c not in special else "_" for c in name)

    if new_name[-1] in " .":
        new_name = new_name[:-1]

    return new_name

In [236]:
dataset_path = config.IMAGES_PATH / "retro-games-gameplay-frames-30k-512p" / "dataset"
games = os.listdir(dataset_path)

videos = pd.read_csv(config.DATA_PATH / "videos.csv", index_col='game')
core_games_info = pd.read_csv(config.DATA_PATH / "core_games_info.csv", index_col='name')
core_games_info = core_games_info[["genres", "player_perspectives"]]
core_games_info = core_games_info.drop_duplicates()

In [237]:
downloaded_games = pd.Series(core_games_info.index).apply(lambda game: game in videos.index).tolist()
downloaded_games_info = core_games_info.loc[downloaded_games]

In [238]:
downloaded_games_info.info()

<class 'pandas.core.frame.DataFrame'>
Index: 28 entries, Silent Hill 2 to Guild Wars
Data columns (total 2 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   genres               28 non-null     object
 1   player_perspectives  28 non-null     object
dtypes: object(2)
memory usage: 672.0+ bytes


In [239]:
def get_frames_count(game_name: str) -> int:
    frames = os.listdir(dataset_path / sanitize_filename(game_name))
    return len(frames)

downloaded_games_info["frames_count"] = pd.Series(downloaded_games_info.index).apply(get_frames_count).to_list()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downloaded_games_info["frames_count"] = pd.Series(downloaded_games_info.index).apply(get_frames_count).to_list()


In [240]:
downloaded_games_info

Unnamed: 0_level_0,genres,player_perspectives,frames_count
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Silent Hill 2,"Puzzle, Adventure",Third person,73
Star Wars: Knights of the Old Republic,"Role-playing (RPG), Adventure",Third person,112
Half-Life 2,Shooter,First person,1644
Grand Theft Auto: San Andreas,"Shooter, Racing, Adventure","First person, Third person",792
System Shock 2,"Shooter, Role-playing (RPG)",First person,17
Resident Evil 4,"Shooter, Adventure",Third person,184
Half-Life,"Shooter, Puzzle, Adventure",First person,19
Warcraft III: Reign of Chaos,"Real Time Strategy (RTS), Strategy",Bird view / Isometric,169
Grand Theft Auto: Vice City,"Shooter, Racing, Adventure, Arcade",Third person,238
Thief II: The Metal Age,"Shooter, Simulator, Adventure",First person,30


In [241]:
game_genres_distribution = {}

for game_name, genres_string in downloaded_games_info['genres'].drop_duplicates().items():
    genres = genres_string.split(', ')

    for genre in genres:
        if genre not in game_genres_distribution:
            game_genres_distribution[genre] = []
        game_genres_distribution[genre].append(game_name)

In [242]:
for genre in game_genres_distribution:
    print(genre, len(game_genres_distribution[genre]))

Puzzle 8
Adventure 14
Role-playing (RPG) 5
Shooter 12
Racing 3
Real Time Strategy (RTS) 2
Strategy 3
Arcade 1
Simulator 4
Platform 3
Hack and slash/Beat 'em up 2
Turn-based strategy (TBS) 1
Tactical 1


In [291]:
frame_genres_distribution = {}
total_frames_count = np.sum(downloaded_games_info['frames_count'])

for genre in game_genres_distribution:
    total_genre_frames_count = 0
    games = game_genres_distribution[genre]
    
    for game in games:
        total_genre_frames_count += int(downloaded_games_info.loc[game]['frames_count'])
    
    frame_genres_distribution[genre] = total_genre_frames_count

In [295]:
frame_genres_distribution.items()

dict_items([('Puzzle', 946), ('Adventure', 2866), ('Role-playing (RPG)', 734), ('Shooter', 4914), ('Racing', 1572), ('Real Time Strategy (RTS)', 567), ('Strategy', 582), ('Arcade', 238), ('Simulator', 1913), ('Platform', 516), ("Hack and slash/Beat 'em up", 819), ('Turn-based strategy (TBS)', 398), ('Tactical', 398)])

In [311]:
frames_counts = [value for value in frame_genres_distribution.values()]

df = pd.DataFrame(data=frames_counts, index=list(frame_genres_distribution.keys()), columns=['frames_count'])
df['percent'] = np.array(df['frames_count']) / total_frames_count * 100

df.sort_values(by='percent', ascending=False)

Unnamed: 0,frames_count,percent
Shooter,4914,48.866348
Adventure,2866,28.500398
Simulator,1913,19.023469
Racing,1572,15.632458
Puzzle,946,9.407319
Hack and slash/Beat 'em up,819,8.144391
Role-playing (RPG),734,7.299125
Strategy,582,5.787589
Real Time Strategy (RTS),567,5.638425
Platform,516,5.131265
