# Heroes of Pymoli Data Analysis

In [1]:
# Dependencies
import pandas as pd

In [2]:
# Read JSON
path_to_jason = 'Desktop/pandas-challenge/HeroesOfPymoli/purchase_data.json'
hop_data      = pd.read_json(path_to_jason)
hop_data.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [3]:
# Total Number of Players
player_count = hop_data['SN'].value_counts().count()
player_count = pd.DataFrame(data=[{'Total Players': player_count}])
player_count

Unnamed: 0,Total Players
0,573


## Purchasing Analysis (Total)

In [4]:
# Number of Unique Items
unique_count = hop_data['Item Name'].value_counts().count()
# Number of Purchases
purchase_count = hop_data['Price'].count()
# Average Purchase Price
price_average = hop_data['Price'].mean()
price_average = f"${round(price_average, 2)}"
# Total Revenue
revneue_total = hop_data['Price'].sum()
revneue_total = f"${round(revneue_total, 2)}"

purchase_analysis = pd.DataFrame(data=[{'Number of Unique Items': unique_count,
                                        'Number of Purchases':    purchase_count,
                                        'Average Purchase Price': price_average,
                                        'Total Revenue':          revneue_total}])
purchase_analysis

Unnamed: 0,Average Purchase Price,Number of Purchases,Number of Unique Items,Total Revenue
0,$2.93,780,179,$2286.33


## Gender Demographics

In [5]:
# Count, Total, and List
gender_count    = hop_data['Gender'].value_counts()
gender_total    = gender_count.sum()
gender_category = set(hop_data['Gender'])
# Count of Players
gender_count = {gender: gender_count[gender] for gender in gender_category}
# Percentage of Players
gender_percentages  = {gender: f"{round((count / gender_total) * 100, 2)}%" for gender, count in gender_count.items()}

gender_data = {'Total Count': gender_count,
               'Percentage':  gender_percentages}

gender_demographics = pd.DataFrame(gender_data)
gender_demographics

Unnamed: 0,Percentage,Total Count
Female,17.44%,136
Male,81.15%,633
Other / Non-Disclosed,1.41%,11


## Purchasing Analysis (Gender)

In [7]:
# The below each broken by gender
gp_data = hop_data.groupby('Gender')
gp_data = {key: value['Price'] for key, value in gp_data}
# Purchase Count
purchase_count = {gender: gp_data[gender].count() for gender in gender_category} # gender_category - from Gender Demographics
# Total Purchase Value
purchase_total           = {gender: gp_data[gender].sum() for gender in gender_category}
formatted_purchase_total = {gender: f"${round(total, 2)}" for gender, total in purchase_total.items()}
# Average Purchase Price
purchase_average = {gender: f"${round(total / purchase_count[gender],2)}" for gender, total in purchase_total.items()}
# Normalized Totals

gp_data = {'Purchase Count': purchase_count,
           'Average Purchase Price': purchase_average,
           'Total Purchase Value': formatted_purchase_total}

purchase_analysis = pd.DataFrame(gp_data)
purchase_analysis

Unnamed: 0,Average Purchase Price,Purchase Count,Total Purchase Value
Female,$2.82,136,$382.91
Male,$2.95,633,$1867.68
Other / Non-Disclosed,$3.25,11,$35.74


## Age Demographics

In [8]:
# The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)
age_category = set(hop_data['Age'])
age_data     = hop_data.groupby('Age')
age_data     = {key: value['Price'] for key, value in age_data}
# Purchase Count
purchase_count = {age: age_data[age].count() for age in age_category}
# Total Purchase Value
purchase_total           = {age: age_data[age].sum() for age in age_category}
formatted_purchase_total = {age: f"${round(total, 2)}" for age, total in purchase_total.items()}
# Average Purchase Price
purchase_average = {age: total / purchase_count[age] for age, total in purchase_total.items()}
formatted_purchase_average = {age: f"${round(total / purchase_count[age],2)}" for age, total in purchase_total.items()}
# Normalized Totals

age_data = {'Purchase Count':         purchase_count,
            'Average Purchase Price': formatted_purchase_total,
            'Total Purchase Value':   formatted_purchase_average}

purchase_analysis = pd.DataFrame(age_data)
purchase_analysis

Unnamed: 0,Average Purchase Price,Purchase Count,Total Purchase Value
7,$55.47,19,$2.92
8,$5.87,3,$1.96
9,$22.12,6,$3.69
10,$13.16,4,$3.29
11,$26.76,9,$2.97
12,$19.21,5,$3.84
13,$23.52,11,$2.14
14,$14.3,6,$2.38
15,$140.36,47,$2.99
16,$60.71,23,$2.64
