# Dataset Exploration

In [1]:
import pandas as pd
import ast 

In [2]:
def calculate_purchases(df_purchased_games):
    df_purchased_games['library'] = df_purchased_games['library'].apply(ast.literal_eval)
    exploded = df_purchased_games.explode('library')
    game_counts = exploded['library'].value_counts().reset_index()
    game_counts.columns = ['game_id', 'sales']
    result_df = game_counts.reset_index(drop=True)
    return result_df

def attach_game_information(df_purchases, df_games):
    merged_df = pd.merge(
        df_purchases, 
        df_games, 
        left_on='game_id', 
        right_on='gameid',
        how='inner'
    )
    merged_df.drop(columns=['gameid'], inplace=True)
    return merged_df

def attach_game_price(df_games, df_prices):
    merged_df = pd.merge(
        df_games, 
        df_prices, 
        left_on='game_id', 
        right_on='gameid',
        how='inner'
    )
    merged_df.drop(columns=['gameid'], inplace=True)
    return merged_df

def merge_dataset(folder):
    df_games = pd.read_csv('../gaming/' + folder + '/games.csv')
    df_prices = pd.read_csv('../gaming/' + folder + '/prices.csv')
    df_purchased_games = pd.read_csv('../gaming/' + folder + '/purchased_games.csv')

    df_sum_purchased = calculate_purchases(df_purchased_games)
    df_games_with_sales = attach_game_information(df_sum_purchased, df_games)
    df_games_with_sales_and_price = attach_game_price(df_games_with_sales, df_prices)

    return df_games_with_sales_and_price

merge_dataset('playstation').head()

Unnamed: 0,game_id,sales,title,platform,developers,publishers,genres,supported_languages,release_date,usd,eur,gbp,jpy,rub,date_acquired
0,7779,30721,Grand Theft Auto V,PS4,['rockstar north'],['Rockstar Games'],['Action'],"['Japanese', 'French', 'Spanish', 'German', 'I...",2022-03-15,19.79,19.79,16.49,,2849.0,2025-02-22
1,7779,30721,Grand Theft Auto V,PS4,['rockstar north'],['Rockstar Games'],['Action'],"['Japanese', 'French', 'Spanish', 'German', 'I...",2022-03-15,19.79,19.79,16.49,,2849.0,2025-02-24
2,16508,26571,Horizon Zero Dawn,PS4,['Guerrilla Games'],['Sony Interactive Entertainment'],"['Open World', 'Action-RPG', 'Role Playing']","['Japanese', 'French', 'Spanish', 'German', 'I...",2017-02-28,,,,,,2025-02-22
3,16508,26571,Horizon Zero Dawn,PS4,['Guerrilla Games'],['Sony Interactive Entertainment'],"['Open World', 'Action-RPG', 'Role Playing']","['Japanese', 'French', 'Spanish', 'German', 'I...",2017-02-28,,,,,,2025-02-24
4,333860,26142,Call of Duty Modern Warfare,PS4,['Infinity Ward'],['Activision'],"['Battle Royale', 'First Person Shooter']","['Japanese', 'French', 'Spanish', 'German', 'I...",2019-10-25,59.99,59.99,49.99,,,2025-02-22


In [3]:
df_purchased_games = pd.read_csv('../gaming/' + 'playstation' + '/purchased_games.csv')
df_sum_purchased = calculate_purchases(df_purchased_games)

df_games = pd.read_csv('../gaming/' + 'playstation' + '/games.csv')

merged_df = pd.merge(
        df_sum_purchased, 
        df_games, 
        left_on='game_id', 
        right_on='gameid',
        how='inner'
    )


merged_df.head()


Unnamed: 0,game_id,sales,gameid,title,platform,developers,publishers,genres,supported_languages,release_date
0,7779,30721,7779,Grand Theft Auto V,PS4,['rockstar north'],['Rockstar Games'],['Action'],"['Japanese', 'French', 'Spanish', 'German', 'I...",2022-03-15
1,16508,26571,16508,Horizon Zero Dawn,PS4,['Guerrilla Games'],['Sony Interactive Entertainment'],"['Open World', 'Action-RPG', 'Role Playing']","['Japanese', 'French', 'Spanish', 'German', 'I...",2017-02-28
2,333860,26142,333860,Call of Duty Modern Warfare,PS4,['Infinity Ward'],['Activision'],"['Battle Royale', 'First Person Shooter']","['Japanese', 'French', 'Spanish', 'German', 'I...",2019-10-25
3,167482,25707,167482,Red Dead Redemption 2,PS4,['Rockstar Games'],['Rockstar Games'],['Adventure'],"['Japanese', 'French', 'Spanish', 'German', 'I...",2018-10-26
4,6988,25620,6988,The Last of Us Remastered,PS4,['Naughty Dog'],['Sony Computer Entertainment America'],"['Action', 'Adventure']","['Japanese', 'French', 'Spanish', 'German', 'I...",2014-07-29


In [4]:
df_games[df_games['gameid'] == 7779]

Unnamed: 0,gameid,title,platform,developers,publishers,genres,supported_languages,release_date
11870,7779,Grand Theft Auto V,PS4,['rockstar north'],['Rockstar Games'],['Action'],"['Japanese', 'French', 'Spanish', 'German', 'I...",2022-03-15
