# Heroes of Pymoli Data Analysis

In [184]:
# Dependencies
import pandas as pd
import numpy as np

# Read JSON
path_to_jason = 'Desktop/pandas-challenge/HeroesOfPymoli/purchase_data.json'
hop_data      = pd.read_json(path_to_jason)
hop_data.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [185]:
# Total Number of Players
player_count = hop_data['SN'].value_counts().count()
player_count = pd.DataFrame(data=[{'Total Players': player_count}])
player_count

Unnamed: 0,Total Players
0,573


## Purchasing Analysis (Total)

In [186]:
# Number of Unique Items
unique_count = hop_data['Item Name'].value_counts().count()
# Number of Purchases
purchase_count = hop_data['Price'].count()
# Average Purchase Price
price_average = hop_data['Price'].mean()
price_average = f"${round(price_average, 2)}"
# Total Revenue
revneue_total = hop_data['Price'].sum()
revneue_total = f"${round(revneue_total, 2)}"

purchase_analysis = pd.DataFrame(data=[{'Number of Unique Items': unique_count,
                                        'Number of Purchases':    purchase_count,
                                        'Average Purchase Price': price_average,
                                        'Total Revenue':          revneue_total}])
purchase_analysis

Unnamed: 0,Average Purchase Price,Number of Purchases,Number of Unique Items,Total Revenue
0,$2.93,780,179,$2286.33


## Gender Demographics

In [187]:
# Count, Total, and List
gender_count    = hop_data['Gender'].value_counts()
gender_total    = gender_count.sum()
gp_category = {gender: gender for gender in set(hop_data['Gender'])}
# Count of Players
gender_count = {gender: gender_count[gender] for gender in gender_category}
# Percentage of Players
gender_percentages  = {gender: f"{round((count / gender_total) * 100, 2)}%" for gender, count in gender_count.items()}

gender_data = {'Gender':      gp_category,
               'Total Count': gender_count,
               'Percentage':  gender_percentages}

gender_demographics = pd.DataFrame(gender_data).set_index('Gender')
gender_demographics

Unnamed: 0_level_0,Percentage,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,17.44%,136
Male,81.15%,633
Other / Non-Disclosed,1.41%,11


## Purchasing Analysis (Gender)

In [188]:
# The below each broken by gender
gp_data     = hop_data.groupby('Gender')
gp_data     = {key: value['Price'] for key, value in gp_data}
# Purchase Count
purchase_count = {gender: gp_data[gender].count() for gender in gender_category} # gender_category - from Gender Demographics
# Total Purchase Value
purchase_total           = {gender: gp_data[gender].sum() for gender in gender_category}
formatted_purchase_total = {gender: f"${round(total, 2)}" for gender, total in purchase_total.items()}
# Average Purchase Price
purchase_average = {gender: f"${round(total / purchase_count[gender],2)}" for gender, total in purchase_total.items()}
# Normalized Totals

gp_data = {'Gender':                 gp_category,
           'Purchase Count':         purchase_count,
           'Average Purchase Price': purchase_average,
           'Total Purchase Value':   formatted_purchase_total}

purchase_analysis = pd.DataFrame(gp_data).set_index('Gender')
purchase_analysis

Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,$2.82,136,$382.91
Male,$2.95,633,$1867.68
Other / Non-Disclosed,$3.25,11,$35.74


## Age Demographics

In [229]:
# The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)
count   = hop_data.groupby(pd.cut(hop_data['Age'], np.arange(4, 50, 4))).count()
average = hop_data.groupby(pd.cut(hop_data['Age'], np.arange(4, 50, 4))).mean()
total   = hop_data.groupby(pd.cut(hop_data['Age'], np.arange(4, 50, 4))).sum()

age_data = {'Purchase Count':         count['Price'],
            'Average Purchase Price': average['Price'],
            'Total Purchase Value':   total['Price']} # format into currency

purchase_analysis = pd.DataFrame(age_data)
purchase_analysis 

Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Purchase Value
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(4, 8]",2.788182,22,61.34
"(8, 12]",3.385417,24,81.25
"(12, 16]",2.745862,87,238.89
"(16, 20]",2.907019,161,468.03
"(20, 24]",2.924748,238,696.09
"(24, 28]",2.974712,104,309.37
"(28, 32]",3.06197,66,202.09
"(32, 36]",2.981053,38,113.28
"(36, 40]",2.901351,37,107.35
"(40, 44]",2.96,2,5.92


## Top Spenders

In [10]:
# Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
count   = hop_data.groupby('SN').count()
average = hop_data.groupby('SN').mean()
total   = hop_data.groupby('SN').sum()

sn_data = {'Purchase Count':         count['Price'],
           'Average Purchase Price': average['Price'],
           'Total Purchase Value':   total['Price']} # format into currency

purchase_analysis = pd.DataFrame(sn_data).sort_values('Total Purchase Value', ascending=False).head(5)
purchase_analysis

Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,3.412,5,17.06
Saedue76,3.39,4,13.56
Mindimnya67,3.185,4,12.74
Haellysu29,4.243333,3,12.73
Eoda93,3.86,3,11.58


## Most Popular Items

In [11]:
# Identify the 5 most popular items by purchase count, then list (in a table):
count   = hop_data.groupby('Item Name').count()
average = hop_data.groupby('Item Name').mean()
total   = hop_data.groupby('Item Name').sum()

sn_data = {'Purchase Count':         count['Price'],
           'Average Purchase Price': average['Price'],
           'Total Purchase Value':   total['Price']} # format into currency

purchase_analysis = pd.DataFrame(sn_data).sort_values('Purchase Count', ascending=False).head(5)
purchase_analysis

Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Purchase Value
Item Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Final Critic,2.757143,14,38.6
Arcane Gem,2.23,11,24.53
"Betrayal, Whisper of Grieving Widows",2.35,11,25.85
Stormcaller,3.465,10,34.65
Woeful Adamantite Claymore,1.24,9,11.16


## Most Profitable Items

In [9]:
# Identify the 5 most profitable items by total purchase value, then list (in a table):
purchase_analysis = pd.DataFrame(item_data)
purchase_analysis = purchase_analysis.sort_values('Total Purchase Value', ascending=False).head(5)
purchase_analysis

Unnamed: 0,Average Purchase Price,Purchase Count,Total Purchase Value
Final Critic,$2.76,14,38.6
Retribution Axe,$4.14,9,37.26
Stormcaller,$3.46,10,34.65
Spectral Diamond Doomblade,$4.25,7,29.75
Orenmir,$4.95,6,29.7
