In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import re 
import json

In [2]:
game_dataset = pd.read_csv('dataset/games_dropped_column.csv')

In [3]:
print(game_dataset.keys())

Index(['id', 'name', 'release_date', 'Estimated owners', 'user_score',
       'average_playtime_forever', 'median_playtime_forever', 'Publishers',
       'genres'],
      dtype='object')


## Process Date format

In [4]:
game_dataset['release_date']

0        Oct 21, 2008
1        Oct 12, 2017
2        Nov 17, 2021
3        Jul 23, 2020
4         Feb 3, 2020
             ...     
97405    Aug 22, 2024
97406    Aug 30, 2024
97407    Aug 30, 2024
97408    Aug 29, 2024
97409    Aug 30, 2024
Name: release_date, Length: 97410, dtype: object

In [26]:
game_dataset["release_date"] = game_dataset["release_date"].apply(lambda x: pd.to_datetime(x).strftime('%Y-%m'))

def processOwners(value):
    if value == '0':
        return 0
    else:
        v = value.split('-')
        v1 = int(v[0].strip())
        v2 = int(v[1].strip())
        return int((v1 + v2) / 2)

game_dataset['Estimated owners'] = game_dataset['Estimated owners'].apply(lambda x: processOwners(x))
print(game_dataset)

            id                    name release_date  Estimated owners  \
0        20200        Galactic Bowling      2008-10             10000   
1       655370            Train Bandit      2017-10             10000   
2      1732930            Jolt Project      2021-11             10000   
3      1355720                Henosis™      2020-07             10000   
4      1139950   Two Weeks in Painland      2020-02             10000   
...        ...                     ...          ...               ...   
97405  3080940  Femdom Game World: Mom      2024-08             10000   
97406  2593970             Blocky Farm      2024-08                 0   
97407  3137150    Infiltrate & Extract      2024-08                 0   
97408  3124670       Escape The Garage      2024-08                 0   
97409  3054200              Lober Lobe      2024-08                 0   

       user_score  average_playtime_forever  median_playtime_forever  \
0               0                         0        

In [27]:
print(min(game_dataset["release_date"]))
print(max(game_dataset["release_date"]))

1997-06
2025-04


## Get all genres

In [28]:
def process_genres(value):
    if isinstance(value, float):
        return []
    genres = value.split(",")
    genres = [genre.strip() for genre in genres]
    return genres

game_dataset["genres"] = game_dataset["genres"].apply(process_genres)

AttributeError: 'list' object has no attribute 'split'

In [29]:
all_genres = []
for genres in game_dataset["genres"]:
    for elem in genres:
        all_genres.append(elem)

all_genres_distributions = pd.Series(all_genres).value_counts()
all_genres_distributions_20 = all_genres_distributions.head(20)

In [30]:
all_genres_distributions_20

Indie                    64501
Casual                   39308
Action                   38004
Adventure                35864
Simulation               18608
Strategy                 17760
RPG                      16657
Early Access             12097
Free to Play              7920
Sports                    4262
Racing                    3482
Massively Multiplayer     2422
Utilities                  925
Design & Illustration      538
Violent                    527
Animation & Modeling       442
Education                  419
Free To Play               323
Video Production           317
Gore                       311
Name: count, dtype: int64

In [67]:
all_genres_renamed = {'Indie':'Indie', 'Casual':'Casual', 'Action':'Action', 'Adventure':'Adventure', 'Simulation':'Simulation', 'Strategy':'Strategy', 
                      'RPG':'RPG', 'Early Access':'Early_Accsess', 'Free To Play':'Free_to_Play','Free to Play':'Free_to_Play', 'Sports':'Sports','Racing':'Racing', 
                      'Massively Multiplayer': 'Massively_Multiplayer', 'Utilities': 'Utilities', 'Design & Illustration': 'Design_and_Illustration', 'Violent':'Violent', 
                      'Animation & Modeling': 'Animation_and_Modeling', 'Education':'Education', 'Video Production': 'Video_Production', 
                      'Gore':'Gore'}


In [68]:
all_genres_distributions_20
genres_20 = all_genres_distributions_20.keys().to_list()

print(genres_20)

['Indie', 'Casual', 'Action', 'Adventure', 'Simulation', 'Strategy', 'RPG', 'Early Access', 'Free to Play', 'Sports', 'Racing', 'Massively Multiplayer', 'Utilities', 'Design & Illustration', 'Violent', 'Animation & Modeling', 'Education', 'Free To Play', 'Video Production', 'Gore']


In [32]:
game_dataset.loc[0]['genres']

['Casual', 'Indie', 'Sports']

## Keeping only 100 publisers

In [46]:
publishers = game_dataset['Publishers'].value_counts()

publishers_100 = publishers[:100].keys().to_list()
publishers_100_games = {pub: [] for pub in publishers_100}
print(publishers_100_games)

{'Big Fish Games': [], '8floor': [], 'SEGA': [], 'HH-Games': [], 'Laush Studio': [], 'Strategy First': [], 'Square Enix': [], 'Sekai Project': [], 'Choice of Games': [], 'EroticGamesClub': [], 'Ubisoft': [], 'Kagura Games': [], 'THQ Nordic': [], 'Electronic Arts': [], 'Hede': [], 'Boogygames Studios': [], 'Slitherine Ltd.': [], 'Atomic Fabrik,Cristian Manolachi': [], 'Ziggurat': [], 'Alawar Entertainment': [], 'Devolver Digital': [], 'DigiPen Institute of Technology': [], 'Fulqrum Publishing': [], 'Plug In Digital': [], 'Piece Of Voxel': [], 'Pixel Games UK': [], 'Hosted Games': [], 'Microids': [], 'Sokpop Collective': [], 'Komodo': [], 'PLAYISM': [], 'KOEI TECMO GAMES CO., LTD.': [], 'Tero Lunkka': [], 'Reforged Group': [], 'Nacon': [], 'tinyBuild': [], 'Daedalic Entertainment': [], 'IR Studio': [], 'Artifex Mundi': [], '2K': [], 'SA Industry': [], 'MangaGamer': [], 'Paradox Interactive': [], 'Blender Games': [], 'BANDAI NAMCO Entertainment': [], 'Cyber Keks': [], 'KEMCO': [], 'Cherry

## Gathering all games from the chosen publisher if their genres are include in the top 20 most popular genres

In [52]:
def all_genres_in(lst1, lst2):
    return all(elem in lst2 for elem in lst1)

lst1 = ['Action', 'Adventure']
lst2 = ['Action', 'Adventure', 'Indie']
print(all_genres_in(lst1, lst2)) # True

True


In [69]:
def intersection(lst1, lst2):
    return list(set(lst1) & set(lst2))

def all_genres_in(lst1, lst2):
    return all(elem in lst2 for elem in lst1)

games = []
games_index = []

for i in range(len(game_dataset)):
    game = game_dataset.loc[i]
    #if len(intersection(game['genres'], genres_20)) > 0 and game['Publishers'] in publishers_100:
    if all_genres_in(game['genres'],genres_20) and game['Publishers'] in publishers_100:
        games.append(game['name'])
        games_index.append(i)
        publishers_100_games[game['Publishers']].append(game['name'])

In [54]:
len(games_index)

8961

## Remove duplicates

In [55]:
for i in range(len(game)):
    for j in range(i, len(game)): 
        if i != j:
            if games[i] == games[j]:
                print(games[i])
print(len(games))

8961


In [70]:
game_dataset_subset = game_dataset.loc[games_index]
print(len(game_dataset_subset))

display(game_dataset_subset)

for i in range(len(game_dataset_subset)):
    game = game_dataset_subset.iloc[i]
    print(game)
    for i,g in enumerate(game["genres"]):
        game["genres"][i] = all_genres_renamed[g]

game_dataset_subset
    

8541


Unnamed: 0,id,name,release_date,Estimated owners,user_score,average_playtime_forever,median_playtime_forever,Publishers,genres
7,1968760,Legend of Rome - The Wrath of Mars,2022-05,10000,0,0,0,magnussoft,[Casual]
16,1330820,Hunting Unlimited 3,2020-11,10000,0,0,0,Ziggurat,"[Action, Casual, Simulation, Sports]"
41,866150,Treasures of the Ancients: Egypt,2018-06,75000,0,0,0,Garage Games,"[Adventure, Casual, Indie]"
42,231330,Deadfall Adventures,2013-11,150000,0,324,431,THQ Nordic,"[Action, Adventure]"
44,897820,Reigns: Game of Thrones,2018-10,75000,0,83,76,Devolver Digital,"[Adventure, Indie, RPG]"
...,...,...,...,...,...,...,...,...,...
97286,3105140,Cybernated,2024-08,10000,0,0,0,Ghost_RUS Games,"[Action, Adventure, Indie]"
97289,3095570,Hentai Tales: Paradise After Death,2024-09,0,0,0,0,Hentai works,"[Casual, Indie, Strategy]"
97330,3079440,3D PUZZLE - Bedroom,2024-08,0,0,0,0,Hede,"[Action, Casual, Indie]"
97361,3120300,3D PUZZLE - Pizza Shop 1,2024-08,0,0,0,0,Hede,"[Action, Adventure, Casual, Indie, Racing, RPG..."


id                                                     1968760
name                        Legend of Rome - The Wrath of Mars
release_date                                           2022-05
Estimated owners                                         10000
user_score                                                   0
average_playtime_forever                                     0
median_playtime_forever                                      0
Publishers                                          magnussoft
genres                                                [Casual]
Name: 7, dtype: object
id                                                       1330820
name                                         Hunting Unlimited 3
release_date                                             2020-11
Estimated owners                                           10000
user_score                                                     0
average_playtime_forever                                       0
median_playtime_fore

Unnamed: 0,id,name,release_date,Estimated owners,user_score,average_playtime_forever,median_playtime_forever,Publishers,genres
7,1968760,Legend of Rome - The Wrath of Mars,2022-05,10000,0,0,0,magnussoft,[Casual]
16,1330820,Hunting Unlimited 3,2020-11,10000,0,0,0,Ziggurat,"[Action, Casual, Simulation, Sports]"
41,866150,Treasures of the Ancients: Egypt,2018-06,75000,0,0,0,Garage Games,"[Adventure, Casual, Indie]"
42,231330,Deadfall Adventures,2013-11,150000,0,324,431,THQ Nordic,"[Action, Adventure]"
44,897820,Reigns: Game of Thrones,2018-10,75000,0,83,76,Devolver Digital,"[Adventure, Indie, RPG]"
...,...,...,...,...,...,...,...,...,...
97286,3105140,Cybernated,2024-08,10000,0,0,0,Ghost_RUS Games,"[Action, Adventure, Indie]"
97289,3095570,Hentai Tales: Paradise After Death,2024-09,0,0,0,0,Hentai works,"[Casual, Indie, Strategy]"
97330,3079440,3D PUZZLE - Bedroom,2024-08,0,0,0,0,Hede,"[Action, Casual, Indie]"
97361,3120300,3D PUZZLE - Pizza Shop 1,2024-08,0,0,0,0,Hede,"[Action, Adventure, Casual, Indie, Racing, RPG..."


## Create pairs

In [71]:
publishers_100_games

{'Big Fish Games': ["Grim Tales: The Heir Collector's Edition",
  "Surface: Return to Another World Collector's Edition",
  'Mystery Case Files: Ravenhearst®',
  "The Agency of Anomalies: Mystic Hospital Collector's Edition",
  "Dark Romance: Sleepy Hollow Collector's Edition",
  "Fatal Evidence: Cursed Island Collector's Edition",
  "Mystery Case Files®: 13th Skull™ Collector's Edition",
  "Grim Tales: Crimson Hollow Collector's Edition",
  "Witch Hunters: Full Moon Ceremony Collector's Edition",
  "Reflections of Life: Meridiem Collector's Edition",
  "Haunted Hotel: Room 18 Collector's Edition",
  "Haunted Manor: Painted Beauties Collector's Edition",
  "Haunted Hotel: A Past Redeemed Collector's Edition",
  "Mystery Case Files: Crossfade Collector's Edition",
  "Final Cut: Death on the Silver Screen Collector's Edition",
  "Fairy Godmother Stories: Puss in Boots Collector's Edition",
  "Mystery Case Files: Moths to a Flame Collector's Edition",
  "Haunted Legends: The Stone Guest C

In [73]:
p = 0.001

def getAllPairs(publisher_games):
    all_pairs = []
    for key in publisher_games:
        if len(publisher_games[key]) >= 2:
            game_list = publisher_games[key]
            for i in range(len(game_list)):
                for j in range(i+1, len(game_list)):
                    if np.random.rand() < p:
                        all_pairs.append((game_list[i], game_list[j]))
            
    return all_pairs

all_pairs = getAllPairs(publishers_100_games)
print(len(all_pairs))

5097


In [74]:
print(all_pairs)
print(len(all_pairs))

[("The Agency of Anomalies: Mystic Hospital Collector's Edition", "Detectives United: Origins Collector's Edition"), ("Dark Romance: Sleepy Hollow Collector's Edition", "Mystery Trackers: Fall of Iron Rock Collector's Edition"), ("Dark Romance: Sleepy Hollow Collector's Edition", "Grim Tales: Graywitch Collector's Edition"), ("Fatal Evidence: Cursed Island Collector's Edition", "Crossroads: On a Just Path Collector's Edition"), ("Mystery Case Files®: 13th Skull™ Collector's Edition", 'Mahjong Masters: Temple of the Ten Gods'), ("Grim Tales: Crimson Hollow Collector's Edition", "Haunted Hotel: The X Collector's Edition"), ("Grim Tales: Crimson Hollow Collector's Edition", "Grim Tales: The Vengeance Collector's Edition"), ("Grim Tales: Crimson Hollow Collector's Edition", "Living Legends: Voice of the Sea Collector's Edition"), ("Witch Hunters: Full Moon Ceremony Collector's Edition", "Mayan Prophecies: Ship of Spirits Collector's Edition"), ("Witch Hunters: Full Moon Ceremony Collector'

In [75]:
test = {"nodes": [{"id": name, "release_date": release_date,
                   "estimated_owners": estimated_owners, "user_score": user_score, 
                   "average_playtime_forever": average_playtime_forever, 
                   "median_playtime_forever": median_playtime_forever, 
                   "publisher": publisher, "genres": genres} for name, release_date, 
                   estimated_owners, user_score, average_playtime_forever, 
                   median_playtime_forever, publisher, genres in 
                   zip(game_dataset_subset["name"], game_dataset_subset["release_date"], 
                       game_dataset_subset["Estimated owners"], 
                       game_dataset_subset["user_score"], 
                       game_dataset_subset["average_playtime_forever"], 
                       game_dataset_subset["median_playtime_forever"], 
                       game_dataset_subset["Publishers"], 
                       game_dataset_subset["genres"])]}
#test["links"] = [{"source": name1, "target": name2, "source_group": [],"target_group": [], "value": 1} for name1, name2 in all_pairs]
test["links"] = [{"source": name1, "target": name2, "value": 1} for name1, name2 in all_pairs]

In [76]:
test

{'nodes': [{'id': 'Legend of Rome - The Wrath of Mars',
   'release_date': '2022-05',
   'estimated_owners': 10000,
   'user_score': 0,
   'average_playtime_forever': 0,
   'median_playtime_forever': 0,
   'publisher': 'magnussoft',
   'genres': ['Casual']},
  {'id': 'Hunting Unlimited 3',
   'release_date': '2020-11',
   'estimated_owners': 10000,
   'user_score': 0,
   'average_playtime_forever': 0,
   'median_playtime_forever': 0,
   'publisher': 'Ziggurat',
   'genres': ['Action', 'Casual', 'Simulation', 'Sports']},
  {'id': 'Treasures of the Ancients: Egypt',
   'release_date': '2018-06',
   'estimated_owners': 75000,
   'user_score': 0,
   'average_playtime_forever': 0,
   'median_playtime_forever': 0,
   'publisher': 'Garage Games',
   'genres': ['Adventure', 'Casual', 'Indie']},
  {'id': 'Deadfall Adventures',
   'release_date': '2013-11',
   'estimated_owners': 150000,
   'user_score': 0,
   'average_playtime_forever': 324,
   'median_playtime_forever': 431,
   'publisher': 'T

In [77]:
test["release_date"] = test["release_date"].apply(lambda x: pd.to_datetime(x).strftime('%Y-%m'))
test

KeyError: 'release_date'

In [78]:
with open(f"my-app/src/assets/test5.json", "w") as outfile: 
    json.dump(test, outfile)