# Heroes Of Pymoli Data Analysis
* The 16-25 age demographic accounts for nearly two-thirds (65.33%) of all players.
* Although accounting for a small subset of total number of players, those who decline to state their gender outspend both the male and female groups.
* Our oldest demographic (36+) makes the largest purchases on average.  

In [158]:
import pandas as pd
import numpy as np
import json

In [159]:
purchase_data_1 = "purchase_data.json"
purchase_data_2 = "purchase_data2.json"

purchase_data_1_df = pd.read_json(purchase_data_1)
purchase_data_2_df = pd.read_json(purchase_data_2)

In [160]:
combined_purchases = pd.concat([purchase_data_1_df, purchase_data_2_df])

## Player Count

In [161]:
total_players = combined_purchases.loc[:, ["Gender", "SN", "Age"]]
total_players = total_players.drop_duplicates()
count = total_players.count()[0]

In [162]:
pd.DataFrame({"Total Players": [count]})

Unnamed: 0,Total Players
0,646


In [163]:
average_spent = combined_purchases["Price"].mean()
total_revenue = combined_purchases["Price"].sum()
total_purchases = combined_purchases["Price"].count()
item_count = len(combined_purchases["Item ID"].unique())

summary_table = pd.DataFrame({"Unique Items": item_count,
                              "Total Revenue": [total_revenue],
                              "Total Purchases": [total_purchases],
                              "Average Spent": [average_price]})

summary_table ["Average Price"] = summary_table["Average Spent"].map("${:,.2f}".format)
summary_table ["Total Revenue"] = summary_table["Total Revenue"].map("${:,.2f}".format)
summary_table = summary_table.loc[:,["Unique Items", "Average Spent", "Total Purchases", "Total Revenue"]]

summary_table

Unnamed: 0,Unique Items,Average Spent,Total Purchases,Total Revenue
0,184,2.930571,858,"$2,514.43"


## Gender Demographics

In [164]:
gender_totals = player_demographics["Gender"].value_counts()
gender_percentage = (gender_totals / count * 100).round(2)
gender_totals = pd.DataFrame({"Total Count": gender_totals, "Percentage of Total": gender_percentage})

gender_totals

Unnamed: 0,Percentage of Total,Total Count
Male,81.11,524
Female,17.49,113
Other / Non-Disclosed,1.39,9


## Purchases by Gender

In [165]:
gender_purchase_total = combined_purchases.groupby(["Gender"]).sum()["Price"].rename("Total Spent").map("${:,.2f}".format)
gender_average = combined_purchases.groupby(["Gender"]).mean()["Price"].rename("Average Purchase").map("${:,.2f}".format)
gender_counts = combined_purchases.groupby(["Gender"]).count()["Price"].rename("Total Purchases")
gender_data = pd.DataFrame({"Total Purchases": gender_counts, "Average Purchase": gender_average, "Total Spent": gender_purchase_total})

gender_data

Unnamed: 0_level_0,Average Purchase,Total Purchases,Total Spent
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,$2.85,149,$424.29
Male,$2.94,697,"$2,052.28"
Other / Non-Disclosed,$3.15,12,$37.86


## Age Demographics

In [166]:
age_bins = [0, 9.90, 15.90, 25.90, 35.90, 999]
age_groups = ["<10", "10-15", "16-25", "26-35", "36+"]

player_demographics["Age Ranges"] = pd.cut(player_demographics["Age"], age_bins, labels=group_names)

age_demographics_totals = player_demographics["Age Ranges"].value_counts()
age_demographics_percents = (age_demographics_totals / count * 100).round(2)
age_demographics = pd.DataFrame({"Percentage of Total": age_demographics_percents, "Group Total": age_demographics_totals})

age_demographics.sort_index()

Unnamed: 0,Group Total,Percentage of Total
<10,24,3.72
10-15,61,9.44
16-25,422,65.33
26-35,106,16.41
36+,33,5.11


## Purchases by Age

In [167]:
combined_purchases["Age Ranges"] = pd.cut(combined_purchases["Age"], age_bins, labels=group_names)

age_purchase_total = combined_purchases.groupby(["Age Ranges"]).sum()["Price"].rename("Total Spent").map("${:,.2f}".format)
age_average = combined_purchases.groupby(["Age Ranges"]).mean()["Price"].rename("Average Spent").map("${:,.2f}".format)
age_counts = combined_purchases.groupby(["Age Ranges"]).count()["Price"].rename("Total Purchases")
age_data = pd.DataFrame({"Total Purchases": age_counts, "Average Spent": age_average, "Total Spent": age_purchase_total})
age_data = age_data.loc[:, ["Total Purchases", "Average Spent", "Total Spent"]]

age_data

Unnamed: 0_level_0,Total Purchases,Average Spent,Total Spent
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<10,33,$2.95,$97.28
10-15,86,$2.90,$249.52
16-25,542,$2.93,"$1,586.46"
26-35,145,$2.93,$425.32
36+,52,$3.00,$155.85


## Biggest Spenders

In [168]:
user_spend = combined_purchases.groupby(["SN"]).sum()["Price"].rename("Total Spent").map("${:,.2f}".format)
user_average = combined_purchases.groupby(["SN"]).mean()["Price"].rename("Average Spent").map("${:,.2f}".format)
user_purchases = combined_purchases.groupby(["SN"]).count()["Price"].rename("Total Purchases")

user_data = pd.DataFrame({"Total Spent": user_total, "Average Spent": user_average, "Total Purchases": user_count})
user_data = user_data.loc[:,["Total Purchases", "Average Spent", "Total Spent"]]

user_data.sort_values("Total Spent", ascending=False).head(5)

Unnamed: 0_level_0,Total Purchases,Average Spent,Total Spent
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Qarwen67,4,$2.49,$9.97
Ila44,4,$2.42,$9.68
Airi27,3,$3.16,$9.48
Lisistaya47,3,$3.06,$9.19
Tillyrin30,3,$3.06,$9.19


## Most Popular Items

In [169]:
item_popular = combined_purchases.loc[:,["Item ID", "Item Name", "Price"]]

item_revenue = item_popular.groupby(["Item ID", "Item Name"]).sum()["Price"].rename("Total Spent")
item_average = item_popular.groupby(["Item ID", "Item Name"]).mean()["Price"]
item_total = item_popular.groupby(["Item ID", "Item Name"]).count()["Price"].rename("Number of Purchases")
item_popular_pd = item_popular_pd.loc[:,["Number of Purchases", "Item Price", "Total Spent"]]

item_popular_pd.sort_values("Number of Purchases", ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Number of Purchases,Item Price,Total Spent
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
107,"Splitter, Foe Of Subtlety",9,$3.67,$33.03
108,"Extraction, Quickblade Of Trembling Hands",9,$3.14,$28.25
175,Woeful Adamantite Claymore,9,$1.24,$11.16
44,Bonecarvin Battle Axe,9,$2.67,$24.04
34,Retribution Axe,9,$4.14,$37.26


## Most Profitable Items

In [170]:
item_popular_pd.sort_values("Total Spent", ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Number of Purchases,Item Price,Total Spent
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
21,Souleater,3,$3.27,$9.81
37,"Shadow Strike, Glory of Ending Hope",5,$1.93,$9.65
120,Agatha,5,$1.91,$9.55
96,Blood-Forged Skeletal Spine,2,$4.77,$9.54
47,"Alpha, Reach of Ending Hope",6,$1.55,$9.30
