# Heroes of Pymoli Analysis

In [122]:
import json
import pandas as pd
import numpy as np


In [123]:
df = pd.read_json('../Resources/purchase_data.json') 


df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [124]:
player_count = df["SN"].nunique()
print("Player Count:  " + str(player_count))


Player Count:  573


## Purchasing Analysis (Total)

In [125]:
# Number of Unique Items
items = df["Item ID"].nunique()

# Average Purchase Price
avg_pp = "{:.2f}".format(df["Price"].mean())

# Total Number of Purchases
total_purchases = df["SN"].count()

# Total Revenue
total_revenue = df["Price"].sum()


In [126]:

print("Unique Items:  " + str(items))
print("Average Purchase Price:  " + str(avg_pp))
print("Total Purchases:  " + str(total_purchases))
print("Total Revenue:  " + str(total_revenue))


Unique Items:  183
Average Purchase Price:  2.93
Total Purchases:  780
Total Revenue:  2286.33


## Gender Demographics

In [17]:
gender_count = df.groupby('Gender', as_index = True)['SN'].nunique()

# Percentage and Count of Male Players
m = gender_count.loc["Male"]
pm ="{:.2%}".format(m/player_count)

# Percentage and Count of Female Players
f = gender_count.loc["Female"]
fm ="{:.2%}".format(f/player_count)

# Percentage and Count of Other / Non-Disclosed
o = gender_count.loc['Other / Non-Disclosed']
om ="{:.2%}".format(o/player_count)



In [18]:
print(str(m) + " male players made up " + str(pm) + " of all players")
print(str(f) + " female players made up " + str(fm) + " of all players")
print(str(o) + " other/non-disclosed players made up " + str(om) + " of all players")

465 male players made up 81.15% of all players
100 female players made up 17.45% of all players
8 other/non-disclosed players made up 1.40% of all players


## Purchasing Analysis (Gender)


In [129]:
# Purchase Count

pc = df.groupby('Gender', as_index = True)['Item ID'].nunique()
pc = pc.to_frame()

# Average Purchase Price
avp = df.groupby('Gender', as_index = True)['Price'].mean()
avp = avp.to_frame()

# Total Purchase Value
tvp = df.groupby('Gender', as_index = True)['Price'].sum()
tvp = tvp.to_frame()


gender_df = pc.merge(avp, left_index =True,right_index=True)
gender_df = gender_df.merge(tvp, left_index =True,right_index=True)
gender_df = gender_df.rename(index=str, columns={"Item ID": "Total Purchased", "Price_x": "Average Price", "Price_y": "Total Price"})
gender_df.head()

Unnamed: 0_level_0,Total Purchased,Average Price,Total Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,96,2.815515,382.91
Male,180,2.950521,1867.68
Other / Non-Disclosed,11,3.249091,35.74


## Age Demographics


The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.) 


Purchase Count
Average Purchase Price
Total Purchase Value
Normalized Totals

In [130]:
bins = [0, 9, 14, 19, 24, 29, 34, 39, 44]

group_names = ['<10', '10-14', '15-19', '20-24','25-29' , '30-34', '35-39','>40' ] 


df["Age Range"]=pd.cut(df["Age"], bins, labels=group_names)


In [131]:
age_pc = df.groupby('Age Range', as_index = True)['Item ID'].nunique()
age_app = df.groupby('Age Range', as_index = True)['Price'].mean()
age_tpv = df.groupby('Age Range', as_index = True)['Price'].sum()

age_pc = age_pc.to_frame()
age_app = age_app.to_frame()
age_tpv = age_tpv.to_frame()



In [19]:
age_df = age_pc.merge(age_app, left_index =True,right_index=True)
age_df = age_df.merge(age_tpv, left_index =True,right_index=True)
age_df = age_df.rename(index=str, columns={"Item ID": "Total Purchased", "Price_x": "Average Price", "Price_y": "Total Price"})
age_df.head(10)

Unnamed: 0_level_0,Total Purchased,Average Price,Total Price
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<10,24,2.980714,83.46
10-14,32,2.77,96.95
15-19,91,2.905414,386.42
20-24,158,2.913006,978.77
25-29,89,2.96264,370.33
30-34,57,3.082031,197.25
35-39,37,2.842857,119.4
>40,16,3.189375,51.03


## Top Spenders

In [139]:
spender_df = df.groupby('SN')['Price'].sum()
spender_df = spender_df.to_frame()
spender_df["Purchase Count"] = df.groupby('SN')["Item ID"].count()
spender_df["Average Price"] = df.groupby('SN')["Price"].mean()
spender_df["Total Price"] = df.groupby('SN')["Price"].sum()


### Top Spenders

In [141]:
spender_df.nlargest(5, "Price")


Unnamed: 0_level_0,Price,Purchase Count,Average Price,Total Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Undirrala66,17.06,5,3.412,17.06
Saedue76,13.56,4,3.39,13.56
Mindimnya67,12.74,4,3.185,12.74
Haellysu29,12.73,3,4.243333,12.73
Eoda93,11.58,3,3.86,11.58


## Most Popular Items

### Most Popular by Purchase Count

In [142]:
popular_df = df.groupby('Item ID')['Price'].max()
popular_df = popular_df.to_frame()
popular_df["Item Name"] = df["Item Name"]
popular_df["Purchase Count"] = df.groupby('Item ID')["Item ID"].count()
popular_df["Total Price"] = df.groupby('Item ID')["Price"].sum()

popular_df.nlargest(5, "Purchase Count")


Unnamed: 0_level_0,Price,Item Name,Purchase Count,Total Price
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,2.35,Stormfury Mace,11,25.85
84,2.23,"Thorn, Satchel of Dark Souls",11,24.53
13,1.49,"Piety, Guardian of Riddles",9,13.41
31,2.07,"Shadow Strike, Glory of Ending Hope",9,18.63
34,4.14,"Alpha, Reach of Ending Hope",9,37.26


### Most Popular by Total Price

In [143]:
popular_df.nlargest(5, "Total Price")

Unnamed: 0_level_0,Price,Item Name,Purchase Count,Total Price
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,4.14,"Alpha, Reach of Ending Hope",9,37.26
115,4.25,"Thorn, Conqueror of the Corrupted",7,29.75
32,4.95,"Rage, Legacy of the Lone Victor",6,29.7
103,4.87,"Mercy, Katana of Dismay",6,29.22
107,3.61,Spectral Diamond Doomblade,8,28.88


## Incites from Analysis

1 - More men buy items in Pymoli than women.
2 - However this could be skewed by the fact that there are more male players.  
3 - The largest slice in the age demographic by total purchases and price are people between 20-24