# Heroes Of Pymoli Data Analysis
* Of the 1163 active players, the vast majority are male (82%). There also exists, a smaller, but notable proportion of female players (16%).

* Our peak age demographic falls between 20-24 (42%) with secondary groups falling between 15-19 (17.80%) and 25-29 (15.48%).

* Our players are putting in significant cash during the lifetime of their gameplay. Across all major age and gender demographics, the average purchase for a user is roughly $491.   
-----

In [22]:
import pandas as pd
import numpy as np
HOP_file = "purchase_data.json"
purchase_data=pd.read_json(HOP_file)
purchase_data.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [36]:
#numbers_players=len(purchase_data['SN'].unique())

pd.DataFrame({"Total Players":[numbers_players]})

Unnamed: 0,Total Players
0,573


## Purchasing Analysis (Total)

In [24]:
item_count =len(purchase_data["Item ID"].unique())
aver_item_price = purchase_data["Price"].mean()
purchase_count = purchase_data["Price"].count()
total_purchase_value = purchase_data["Price"].sum()

summary_table= pd.DataFrame({"Number of Unique Items": item_count,
                             "Total Revenue": [total_purchase_value],
                             "Number of Purchases": [purchase_count],
                             "Average Purchase Price": [aver_item_price]})

summary_table = summary_table.round(2)
summary_table ["Average Price"] = summary_table["Average Purchase Price"].map("${:,.2f}".format)
summary_table ["Total Revenue"] = summary_table["Total Revenue"].map("${:,.2f}".format)
summary_table = summary_table.loc[:, ["Number of Unique Items", "Average Purchase Price",
                                      "Number of Purchases", "Total Revenue"]]
summary_table

Unnamed: 0,Number of Unique Items,Average Purchase Price,Number of Purchases,Total Revenue
0,183,2.93,780,"$2,286.33"


## Gender Demographics

In [29]:
# Calculations
Gender_demographics_totals = purchase_data["Gender"].value_counts()
Gender_demographics_percents = Gender_demographics_totals / numbers_players * 100

#Data frame for Gender Demographics
Gender_demographics = pd.DataFrame({"Total Count": Gender_demographics_totals,
                                    "Percentage of Players": Gender_demographics_percents})

Gender_demographics= Gender_demographics.round(2)

# Display table of Gender Demographics

Gender_demographics

Unnamed: 0,Percentage of Players,Total Count
Male,110.47,633
Female,23.73,136
Other / Non-Disclosed,1.92,11



## Purchasing Analysis (Gender)

In [30]:
Gender_Total_Purchase= purchase_data.groupby(["Gender"]).sum()["Price"].rename("Total Purchase Value")
Gender_ave=purchase_data.groupby(["Gender"]).mean()["Price"].rename("Average Purchase Price")
Gender_purchase_count=purchase_data.groupby(["Gender"]).count()["Price"].rename("Purchase Count")
#Normalized 
Normalized_total=Gender_Total_Purchase/Gender_demographics["Total Count"]
#Data Frame for Purchasing Analysis by Gender
Gender_table=pd.DataFrame({"Purchase Count":Gender_purchase_count,"Average Purchase Price":Gender_ave,"Total Purchase Value":Gender_Total_Purchase,"Normalized Total":Normalized_total})
df=pd.DataFrame({"Purchase Count":Gender_purchase_count,"Average Purchase Price":Gender_ave,"Total Purchase Value":Gender_Total_Purchase,"Normalized Total":Normalized_total})
Gender_table_df=df.loc[:,["Purchase Count","Average Purchase Price","Total Purchase Value","Normalized Total"]]
Gender_table_df.round(2)
Gender_table_df["Average Purchase Price"]=Gender_table_df["Average Purchase Price"].map("${:,.2f}".format)
Gender_table_df["Total Purchase Value"]=Gender_table_df["Total Purchase Value"].map("${:,.2f}".format)
Gender_table_df["Normalized Total"]=Gender_table_df["Normalized Total"].map("${:,.2f}".format)
Gender_table_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Total
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$2.82
Male,633,$2.95,"$1,867.68",$2.95
Other / Non-Disclosed,11,$3.25,$35.74,$3.25


## Age Demographics

In [31]:
#Create Age bins and groupby
age_bins = [0, 9.90, 14.90, 19.90, 24.9, 29.9, 34.90, 39.90, 9999999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", ">40"]
# Create the category of Players use / use pd.cut with apply
#pd.cut(HOP_purchase_data["Age"],age_bins,labels=group_names)
purchase_data["Age Range"]=pd.cut(purchase_data["Age"],age_bins,labels=group_names)
age_demographics_totals=purchase_data["Age Range"].value_counts()
age_demographics_totals
age_demographics_percent=age_demographics_totals/numbers_players *100
#Data Frame for Age demographcis summary
age_summary=pd.DataFrame({"Percent of Players":age_demographics_percent,"Total Count":age_demographics_totals})
df=pd.DataFrame({"Percent of Players":age_demographics_percent,"Total Count":age_demographics_totals})
age_summary_df=df.loc[:,["Percent of Players","Total Count"]]

age_summary_df=age_summary_df.sort_index()
age_summary_df.round(2)

Unnamed: 0,Percent of Players,Total Count
<10,4.89,28
10-14,6.11,35
15-19,23.21,133
20-24,58.64,336
25-29,21.82,125
30-34,11.17,64
35-39,7.33,42
>40,2.97,17


## Purchasing Analysis (Age)

In [32]:
Age_Total_Purchase=purchase_data.groupby(["Age Range"]).sum()["Price"].rename("Total Purchase Value")
Age_ave=purchase_data.groupby(["Age Range"]).mean()["Price"].rename("Average Purchase Price")
Age_purchase_count=purchase_data.groupby(["Age Range"]).count()["Price"].rename("Purchase Count")
#Normalized 
Age_Normalized_total=Age_Total_Purchase/age_summary_df["Total Count"]
#Data Frame for Purchasing Analysis by Age
Age_table=pd.DataFrame({"Purchase Count":Age_purchase_count,"Average Purchase Price":Age_ave,"Total Purchase Value":Age_Total_Purchase,"Normalized Total":Age_Normalized_total})
df=pd.DataFrame({"Purchase Count":Age_purchase_count,"Average Purchase Price":Age_ave,"Total Purchase Value":Age_Total_Purchase,"Normalized Total":Age_Normalized_total})
Age_table_df=df.loc[:,["Purchase Count","Average Purchase Price","Total Purchase Value","Normalized Total"]]
Age_table_df.round(2)
Age_table_df["Average Purchase Price"]=Age_table_df["Average Purchase Price"].map("${:,.2f}".format)
Age_table_df["Total Purchase Value"]=Age_table_df["Total Purchase Value"].map("${:,.2f}".format)
Age_table_df["Normalized Total"]=Age_table_df["Normalized Total"].map("${:,.2f}".format)
Age_table_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Total
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,28,$2.98,$83.46,$2.98
10-14,35,$2.77,$96.95,$2.77
15-19,133,$2.91,$386.42,$2.91
20-24,336,$2.91,$978.77,$2.91
25-29,125,$2.96,$370.33,$2.96
30-34,64,$3.08,$197.25,$3.08
35-39,42,$2.84,$119.40,$2.84
>40,17,$3.16,$53.75,$3.16


## Top Spenders

In [33]:
Players_purchase_count=purchase_data.groupby(["SN"]).count()["Price"].rename("Purchase Count")
Players_ave= purchase_data.groupby(["SN"]).mean()["Price"].rename("Average Purchase Price")
Players_Total= purchase_data.groupby(["SN"]).sum()["Price"].rename("Total Purchase Value")
#Data Frame total user
players_data=pd.DataFrame({"Purchase Count":Players_purchase_count,"Average Purchase Price":Players_ave,"Total Purchase Value":Players_Total})
# Top 5 spenders
df=pd.DataFrame({"Purchase Count":Players_purchase_count,"Average Purchase Price":Players_ave,"Total Purchase Value":Players_Total})
players_data_df=df.loc[:,["Purchase Count","Average Purchase Price","Total Purchase Value"]]
players_data_df["Average Purchase Price"]=players_data_df["Average Purchase Price"].map("${:,.2f}".format)
players_data_df["Total Purchase Value"]=players_data_df["Total Purchase Value"].map("${:,.2f}".format)

players_data_df.sort_values("Total Purchase Value",ascending=False).head(5).round(2)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Qarwen67,4,$2.49,$9.97
Sondim43,3,$3.13,$9.38
Tillyrin30,3,$3.06,$9.19
Lisistaya47,3,$3.06,$9.19
Tyisriphos58,2,$4.59,$9.18


## Most Popular Items

In [34]:
Players_purchase_count_p=purchase_data.groupby(["Item ID","Item Name"]).count()["Price"].rename("Purchase Count")
Players_ave_p= purchase_data.groupby(["Item ID","Item Name"]).mean()["Price"].rename("Items Price")
Players_Total_p= purchase_data.groupby(["Item ID","Item Name"]).sum()["Price"].rename("Total Purchase Value")
Players_data_popular=pd.DataFrame({"Purchase Count":Players_purchase_count_p,"Items Price":Players_ave_p,"Total Purchase Value":Players_Total_p})
df=pd.DataFrame({"Purchase Count":Players_purchase_count_p,"Items Price":Players_ave_p,"Total Purchase Value":Players_Total_p})
Players_data_popular_df=df.loc[:,["Purchase Count","Items Price","Total Purchase Value"]]
Players_data_popular_df["Items Price"]=Players_data_popular_df["Items Price"].map("${:,.2f}".format)
Players_data_popular_df["Total Purchase Value"]=Players_data_popular_df["Total Purchase Value"].map("${:,.2f}".format)
Players_data_popular_df.sort_values("Purchase Count",ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Items Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


## Most Profitable Items

In [35]:
Players_purchase_count_p=purchase_data.groupby(["Item ID","Item Name"]).count()["Price"].rename("Purchase Count")
Players_ave_p= purchase_data.groupby(["Item ID","Item Name"]).mean()["Price"].rename("Items Price")
Players_Total_p= purchase_data.groupby(["Item ID","Item Name"]).sum()["Price"].rename("Total Purchase Value")
Players_data_popular=pd.DataFrame({"Purchase Count":Players_purchase_count_p,"Items Price":Players_ave_p,"Total Purchase Value":Players_Total_p})
df=pd.DataFrame({"Purchase Count":Players_purchase_count_p,"Items Price":Players_ave_p,"Total Purchase Value":Players_Total_p})
Players_data_popular_df=df.loc[:,["Purchase Count","Items Price","Total Purchase Value"]]
Players_data_popular_df["Items Price"]=Players_data_popular_df["Items Price"].map("${:,.2f}".format)
Players_data_popular_df.sort_values("Total Purchase Value",ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Items Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,37.26
115,Spectral Diamond Doomblade,7,$4.25,29.75
32,Orenmir,6,$4.95,29.7
103,Singed Scalpel,6,$4.87,29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,28.88
