# Heroes of Pymoli

In [8]:
# Import dependencies
import pandas as pd
import numpy as np

# Load Json file
data_json = "purchase_data.json"
# Read purchasing file and store into Pandas dataframe
purchase_df = pd.read_json(data_json, orient = "records")

## Player Count

In [9]:
#Find the total number of players
player_demo = purchase_df.loc[:, ["Gender", "SN", "Age"]]
player_demo = player_demo.drop_duplicates()
num_players = player_demo.count()[0]
#Create Dataframe for total number of players
total_players = pd.DataFrame({"Total Player": [num_players]})
total_players

Unnamed: 0,Total Player
0,573


## Purchasing Analysis (Total)

In [10]:
#Number of unique items
unique_items = len(purchase_df['Item Name'].unique())
#Average purchase price
avg_price = pd.to_numeric(purchase_df['Price'].mean())
#Total number of purchases
num_purchases = purchase_df['Price'].count()
#Total revenue
total_rev = pd.to_numeric(purchase_df['Price'].sum())
#Create Purchase Analysis Dataframe
purchase_analysis = pd.DataFrame({"Number of Unique Items":[unique_items],
                                 "Average Price": [avg_price],
                                 "Number of Purchases":[num_purchases],
                                 "Total Revenue":[total_rev]})
purchase_analysis["Average Price"] = purchase_analysis["Average Price"].map("${:.2f}".format)
purchase_analysis["Total Revenue"] = purchase_analysis["Total Revenue"].map("${:,.2f}".format)
purchase_analysis = purchase_analysis[["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"]]
purchase_analysis

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$2.93,780,"$2,286.33"


## Gender Demographics

In [11]:
#Create Data Frame that shows gender value counts
gender = pd.DataFrame(player_demo["Gender"].value_counts())
#Rename gender column to total counts
gender_df = gender.rename(columns={"Gender": "Total Count"})
#Percentage and count of male players
male_count = gender_df.loc["Male", "Total Count"]
male_percent = (male_count / num_players)*100
#Percentage and count of female players
female_count = gender_df.loc["Female", "Total Count"]
female_percent = (female_count / num_players)*100 
#Percentage and count of other/non-disclosed
other_count = gender_df.loc["Other / Non-Disclosed", "Total Count"]
other_percent = (other_count / num_players)*100
#Create Gender Demographics Dataframe
gender_df['Percentage of Players'] = (male_percent, female_percent, other_percent)
gender_df["Percentage of Players"] = gender_df["Percentage of Players"].map("{:.2f}".format)
gender_df = gender_df[["Percentage of Players", "Total Count"]]
gender_df

Unnamed: 0,Percentage of Players,Total Count
Male,81.15,465
Female,17.45,100
Other / Non-Disclosed,1.4,8


## Purchase Analysis (Gender)

In [None]:
#Purchase Count
gender_purchase_count = purchase_df.groupby(["Gender"]).count()["Price"]
#Average Purchase Count
gender_avg_purchase = purchase_df.groupby(["Gender"]).mean()["Price"]
#Total Purchase Count
gender_total_purchase = purchase_df.groupby(["Gender"]).sum()["Price"]
#Normalized Totals
gender_norm_totals = gender_total_purchase / gender_df["Total Count"]
#Create Gender Purchasing Analysis Dataframe
gender_pa_results = pd.DataFrame({"Purchase Count": gender_purchase_count, "Average Purchase Price": gender_avg_purchase,
                               "Total Purchase Value": gender_total_purchase, "Normalized Totals": gender_norm_totals})
gender_pa_results["Average Purchase Price"] = gender_pa_results["Average Purchase Price"].map("${:.2f}".format)
gender_pa_results["Total Purchase Value"] = gender_pa_results["Total Purchase Value"].map("${:.2f}".format)
gender_pa_results["Normalized Totals"] = gender_pa_results["Normalized Totals"].map("${:.2f}".format)
gender_pa_results = gender_pa_results[["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]
gender_pa_results

## Age Demographics

In [21]:
#Create bins of 4 years
age_bins = [5,15,25,35,50]
age_group_labels = ["05 to 15", "15 to 25", "25 to 35", "35 to 50"]
#Categorize existing players by using age bins
player_demo["Age Group"] = pd.cut(player_demo["Age"], age_bins, labels = age_group_labels)
#Age Demographics Totals
age_demo_totals = player_demo["Age Group"].value_counts()
#Age Demographics Percentages
age_demo_percents = age_demo_totals / num_players * 100
#Create Age Demographics Dataframe
age_demo = pd.DataFrame({"Total Count": age_demo_totals, "Percentage of Players": age_demo_percents})
age_demo = age_demo.round(2)
age_demo.sort_index()

Unnamed: 0,Percentage of Players,Total Count
05 to 15,13.26,76
15 to 25,65.1,373
25 to 35,16.75,96
35 to 50,4.89,28


## Purchase Analysis (Age)

In [22]:
#Bin purchase_df
purchase_df["Age Group"] = pd.cut(purchase_df["Age"], age_bins, labels = age_group_labels)
#Purchase count
age_price_count = purchase_df.groupby(["Age Group"]).count()["Price"]
#Average purchase price
age_avg_price = purchase_df.groupby(["Age Group"]).mean()["Price"]
#Total purchase value
age_total_price = purchase_df.groupby(["Age Group"]).sum()["Price"]
#Normalized Totals
age_norm_total = age_total_price / age_demo["Total Count"]
#Create Age Demographics Dataframe
age_demo_pa = pd.DataFrame({"Purchase Count": age_price_count, "Average Purchase Price": age_avg_price, 
                       "Total Purchase Value": age_total_price, "Normalized Totals": age_norm_total})
age_demo_pa["Average Purchase Price"] = age_demo_pa["Average Purchase Price"].map("${:.2f}".format)
age_demo_pa["Total Purchase Value"] = age_demo_pa["Total Purchase Value"].map("${:,.2f}".format)
age_demo_pa["Normalized Totals"] = age_demo_pa["Normalized Totals"].map("${:.2f}".format)
age_demo_pa = age_demo_pa[["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"]]
age_demo_pa

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
05 to 15,110,$2.92,$320.77,$4.22
15 to 25,489,$2.93,"$1,431.35",$3.84
25 to 35,134,$2.97,$398.08,$4.15
35 to 50,47,$2.90,$136.13,$4.86


## Top Spenders

In [27]:
##Identify top 5 spenders by total purchase value and then list(in a table)##
#Purchase Count
top_spender_count = purchase_df.groupby(["SN"]).count()["Price"]
#Average Purchase Price
top_spender_avg = purchase_df.groupby(["SN"]).mean()["Price"]
#Total Purchase Value
top_spender_total = purchase_df.groupby(["SN"]).sum()["Price"]
#Create Top Spender Dataframe
top_spender_results = pd.DataFrame({"Purchase Count": top_spender_count, "Average Purchase Price": top_spender_avg,
                                    "Total Purchase Value": top_spender_total})
top_spenders_results = top_spender_results[["Purchase Count", "Average Purchase Price", "Total Purchase Value"]]
top_spenders_results = top_spenders_results.sort_values("Total Purchase Value", ascending = False)
top_spenders_results["Average Purchase Price"] = top_spenders_results["Average Purchase Price"].map("${:.2f}".format)
top_spenders_results["Total Purchase Value"] = top_spenders_results["Total Purchase Value"].map("${:.2f}".format)
top_spenders_results.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


## Most Popular Items

In [28]:
##Most Popular Items: Identify the 5 most popular items by purchase count, then list (in a table)##
#Purchase Total
pop_items_total = purchase_df.groupby(["Item ID", "Item Name"]).sum()["Price"]
#Purchase Price
pop_items_price = purchase_df.groupby(["Item ID", "Item Name"]).mean()["Price"]
#Purchase Count
pop_items_count = purchase_df.groupby(["Item ID", "Item Name"]).count()["Price"]
#Create Most Popular Items Dataframe
pop_items_results = pd.DataFrame({"Purchase Count": pop_items_count, "Item Price": pop_items_price,
                                  "Total Purchase Value": pop_items_total})
pop_items_results = pop_items_results[["Purchase Count", "Item Price", "Total Purchase Value"]]
pop_items_results = pop_items_results.sort_values("Purchase Count", ascending = False)
pop_items_results["Item Price"] = pop_items_results["Item Price"].map("${:.2f}".format)
pop_items_results["Total Purchase Value"] = pop_items_results["Total Purchase Value"].map("${:.2f}".format)
pop_items_results.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


## Most Profitable Items

In [29]:
##Most Profitable Items: Identify the 5 most profitable items by total purchase value, then list(in a table)##
#Purchase Total
profitable_items_total = purchase_df.groupby(["Item ID", "Item Name"]).sum()["Price"]
#Item Price
profitable_items_price = purchase_df.groupby(["Item ID", "Item Name"]).mean()["Price"]
#Purchase Count
profitable_items_count = purchase_df.groupby(["Item ID", "Item Name"]).count()["Price"]
#Create Most Profitable Items Data Frame
profitable_items_results = pd.DataFrame({"Purchase Count": profitable_items_count, "Item Price": profitable_items_price,
                                         "Total Purchase Value": profitable_items_total})
profitable_items_results = profitable_items_results[["Purchase Count", "Item Price", "Total Purchase Value"]]
profitable_items_results = profitable_items_results.sort_values("Total Purchase Value", ascending = False)
profitable_items_results["Item Price"] = profitable_items_results["Item Price"].map("${:.2f}".format)
profitable_items_results["Total Purchase Value"] = profitable_items_results["Total Purchase Value"].map("${:.2f}".format)
profitable_items_results.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
