In [2]:
import pandas as pd
import numpy as np

In [3]:
file = "purchase_data.json"


In [4]:
df = pd.read_json(file, orient="records")
df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 6 columns):
Age          780 non-null int64
Gender       780 non-null object
Item ID      780 non-null int64
Item Name    780 non-null object
Price        780 non-null float64
SN           780 non-null object
dtypes: float64(1), int64(2), object(3)
memory usage: 36.6+ KB


In [6]:
df.columns

Index(['Age', 'Gender', 'Item ID', 'Item Name', 'Price', 'SN'], dtype='object')

In [22]:
total_players = df.loc[:,["Gender","SN","Age"]]
total_players = total_players.drop_duplicates()
tp = total_players.count()[0]



print("There are " + str(tp) + " total players.")

There are 573 total players.


In [23]:
unique_items = df["Item Name"].unique()

print("There are a total of " + str(len(unique_items)) + " unique items.")

There are a total of 179 unique items.


In [9]:
average_purchase = df["Price"].mean()
avg = round(average_purchase,2)

print("The average purchase price of an item in Heroes of Pymoli was $" + str(avg))

The average purchase price of an item in Heroes of Pymoli was $2.93


In [24]:
total_purchases = df["Price"].count()

print("The total number of purchases was " + str(total_purchases) + ".")

The total number of purchases was 780.


In [25]:
revenue = df["Price"].sum()

print("The total revenue of purchases was $" + str(revenue) + ".")

The total revenue of purchases was $2286.33.


In [12]:
total_players["Gender"].value_counts()

Male                     465
Female                   100
Other / Non-Disclosed      8
Name: Gender, dtype: int64

In [13]:
gender_info = total_players["Gender"].value_counts()
gender_percents = gender_info / tp *100

gender = pd.DataFrame({"Total Count": gender_info, "Percentage of Players": gender_percents})
                                
gender = gender.round(2)
gender

Unnamed: 0,Total Count,Percentage of Players
Male,465,81.15
Female,100,17.45
Other / Non-Disclosed,8,1.4


In [14]:
avg_gender_purchase = df.groupby("Gender")

gender_info_v2 =df["Gender"].value_counts()

avg_g = avg_gender_purchase["Price"].mean()
avg_g = avg_g.round(2)

total_gender_p = avg_gender_purchase["Price"].sum()
total_gender_p = total_gender_p.round(2)

normalized_t = total_gender_p / gender["Total Count"]
normalized_t = normalized_t.round(2)

gender_purchase = pd.DataFrame({"Purchase Count": gender_info_v2,"Average Purchase Price": avg_g, 
                                "Total Purchase Price":total_gender_p,"Normalized Total":normalized_t})

gender_purchase

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Price,Normalized Total
Female,136,2.82,382.91,3.83
Male,633,2.95,1867.68,4.02
Other / Non-Disclosed,11,3.25,35.74,4.47


In [15]:
print(df["Age"].min())
print(df["Age"].max())

bins = [0,10,15,20,25,30,35,40,100]

group_labels = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]

total_players["Age Range"] = pd.cut(df["Age"],bins, labels=group_labels)

age_totals = total_players["Age Range"].value_counts()
age_percents = age_totals / tp *100
age = pd.DataFrame({"Total Counts":age_totals,"Percentage of Players":age_percents})
age = age.round(2)

age.sort_index()

7
45


Unnamed: 0,Total Counts,Percentage of Players
<10,22,3.84
10-14,54,9.42
15-19,139,24.26
20-24,234,40.84
25-29,52,9.08
30-34,44,7.68
35-39,25,4.36
40+,3,0.52


In [16]:
df["Age Range"] = pd.cut(df["Age"],bins,labels=group_labels)

age_total_purchase = df.groupby(["Age Range"]).sum()["Price"]
age_average = df.groupby(["Age Range"]).mean()["Price"]
age_average = age_average.round(2)
age_count = df.groupby(["Age Range"]).count()["Price"]

normalized_total = age_total_purchase / age["Total Counts"]
normalized_total = normalized_total.round(2)

age_range_data = pd.DataFrame({"Purchase Count": age_count, "Average Purchase Price": age_average, 
                               "Total Purchase Value": age_total_purchase, "Normalized Totals": normalized_total})
age_range_data.sort_index()

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
10-14,78,2.87,224.15,4.15
15-19,184,2.87,528.74,3.8
20-24,305,2.96,902.61,3.86
25-29,76,2.89,219.82,4.23
30-34,58,3.07,178.26,4.05
35-39,44,2.9,127.49,5.1
40+,3,2.88,8.64,2.88
<10,32,3.02,96.62,4.39


In [17]:
purchase_count = df.groupby(["SN"]).count()["Price"]
purchase_average = df.groupby(["SN"]).mean()["Price"]
purchase_average = purchase_average.round(2)
purchase_value = df.groupby(["SN"]).sum()["Price"]


spender_data = pd.DataFrame({"Purchase Count": purchase_count, "Average Purchase Price": purchase_average, 
                               "Total Purchase Value": purchase_value})

In [18]:
spender_data.sort_values("Total Purchase Value", ascending = False).head(5)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,3.41,17.06
Saedue76,4,3.39,13.56
Mindimnya67,4,3.18,12.74
Haellysu29,3,4.24,12.73
Eoda93,3,3.86,11.58


In [19]:
item_data = df.loc[:,["Item ID","Item Name","Price"]]

total_item_purchase = item_data.groupby(["Item ID","Item Name"]).sum()["Price"]
average_item = item_data.groupby(["Item ID","Item Name"]).mean()["Price"]
item = item_data.groupby(["Item ID","Item Name"]).count()["Price"]

items = pd.DataFrame({"Purchase Count" : item,"Item Price" : average_item,"Total Purchase Value" : total_item_purchase})


In [20]:
items.sort_values("Purchase Count", ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,2.35,25.85
84,Arcane Gem,11,2.23,24.53
31,Trickster,9,2.07,18.63
175,Woeful Adamantite Claymore,9,1.24,11.16
13,Serenity,9,1.49,13.41


In [21]:
items.sort_values("Total Purchase Value", ascending=False).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,4.14,37.26
115,Spectral Diamond Doomblade,7,4.25,29.75
32,Orenmir,6,4.95,29.7
103,Singed Scalpel,6,4.87,29.22
107,"Splitter, Foe Of Subtlety",8,3.61,28.88


In [None]:
There are four times more players than females which could have effected the purchase types. 
When the purchases are extrapolated by gender, they spend equally. 
The bulk of the players are between the ages of 15-24.
The items that sold the most did not have a great quantity; 11 is not a large quantity, considering the total number of purchases made.  

This information can provide insight on the type of items to produce to increase/maintain revenue for the game. 
