# Heroes of Pymoli 
### Andrew Anastasiades | @andrew-ana
---

## LOAD DATA
---

In [1]:
# DEPENDENCIES
import pandas as pd #File IO and Data Manipulation
import os #OS agnostic file structure
# FILE PATHS
purchase_filename = os.path.join("Resources", "purchase_data.csv")
# INITIALIZE DATAFRAME FROM FILE
df = pd.read_csv(purchase_filename)

## INSPECT DATA
---

In [2]:
df.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,91.755128,3.050987
std,225.310896,6.659444,52.697702,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,47.75,1.98
50%,389.5,22.0,92.0,3.15
75%,584.25,25.0,138.0,4.08
max,779.0,45.0,183.0,4.99


In [3]:
df.head(5)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## ANALYZE DATA
---

### PLAYER ANALYSIS

In [4]:
num_players = df['SN'].nunique() #Unique SN
print(f"There are {num_players} unique players in the game")

There are 576 unique players in the game


### PURCHASING ANALYSIS (TOTAL)

In [13]:
num_items = df['Item ID'].nunique() #Unique SN
print(f"There are {num_items} unique items")
avg_price = df['Price'].mean()
print(f"The average purchase price is ${avg_price:.2f}")
num_purchases = len(df) #Each row is a purchase
print(f"There are {num_purchases} purchases")
rev_total = df['Price'].sum()
print(f"Total Purchases were ${rev_total:.2f}")
avg_purchases = num_purchases / num_players
print(f"The average number of purchases is {avg_purchases:.2f}")

There are 179 unique items
The average purchase price is $3.05
There are 780 purchases
Total Purchases were $2379.77
The average number of purchases is 1.35


### GENDER DEMOGRAPHICS

In [6]:
gender_group = df.groupby(by=['Gender']) #Group By Gender
gender_df = pd.DataFrame() #I'm going to make a DataFrame to house my statistics
gender_df["Players"] = gender_group['SN'].nunique()
gender_df["Percentage"] = gender_group['SN'].nunique() / num_players
gender_df.style.format({'Percentage': '{:,.2%}'.format}) #format and show

Unnamed: 0_level_0,Players,Percentage
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06%
Male,484,84.03%
Other / Non-Disclosed,11,1.91%


### PURCHASING ANALYSIS (GENDER)

In [7]:
gender_purchases_df = pd.DataFrame() #I'm going to make a DataFrame to house my statistics
gender_purchases_df["Purchases"] = gender_group['SN'].count() #count each row in each group
gender_purchases_df["Average Price"] = gender_group['Price'].mean() #Average
gender_purchases_df["Total Value"] = gender_group['Price'].sum() #Subtotal
gender_purchases_df["Value / Player"] = gender_purchases_df["Total Value"] / gender_df["Players"] #LTV = group rev / group size
gender_purchases_df.style.format({
    'Average Price': '${:,.2f}'.format,
    'Total Value': '${:,.2f}'.format,
    'Value / Player': '${:,.2f}'.format,
})

Unnamed: 0_level_0,Purchases,Average Price,Total Value,Value / Player
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


### AGE DEMOGRAPHICS

In [8]:
#First Prepare the Bins and Group
age_bin_max = round((max(df['Age'])-10) / 4)+1#MATH = How many bins will need? 
age_bins = [0,10] + [10+i*4 for i in range(1,age_bin_max)] #Make my bins
df['Age Group'] = pd.cut(df['Age'], age_bins, right=False) #Add 'Age Group' Column
age_groups = df.groupby(['Age Group']) #Groupby 'Age Group'

#Analyze
age_df = pd.DataFrame()#I'm going to make a DataFrame to house my statistics
age_df["Players"] = age_groups['SN'].nunique() #How many people in each group?
age_df["Purchases"] = age_groups['SN'].count() #How many purchases
age_df["Average Price"] = age_groups['Price'].mean() #Average purchase price
age_df["Total Value"] = age_groups['Price'].sum() #Sum of Prices
age_df["Value / Player"] = age_df["Total Value"] / age_df["Players"] #LTV = group rev / group size
age_df.style.format({
    'Average Price': '${:,.2f}'.format,
    'Total Value': '${:,.2f}'.format,
    'Value / Player': '${:,.2f}'.format,
})

Unnamed: 0_level_0,Players,Purchases,Average Price,Total Value,Value / Player
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"[0, 10)",17,23,$3.35,$77.13,$4.54
"[10, 14)",20,26,$2.92,$75.87,$3.79
"[14, 18)",71,89,$3.01,$267.60,$3.77
"[18, 22)",150,210,$3.08,$647.26,$4.32
"[22, 26)",189,263,$3.05,$800.90,$4.24
"[26, 30)",34,42,$2.65,$111.10,$3.27
"[30, 34)",45,64,$3.00,$191.87,$4.26
"[34, 38)",27,35,$3.21,$112.33,$4.16
"[38, 42)",18,23,$3.51,$80.72,$4.48
"[42, 46)",5,5,$3.00,$14.99,$3.00


### TOP SPENDERS

In [9]:
player_group = df.groupby('SN')#Want unique players
top_spenders = pd.DataFrame()#I'm going to make a DataFrame to house my statistics
top_spenders['Total Purchases'] = player_group['Price'].sum()
top_spenders['Purchase Count'] = player_group['Price'].count()
top_spenders['Average Purchase Price'] = top_spenders['Total Purchases'] / top_spenders['Purchase Count']
top_spenders = top_spenders.sort_values('Total Purchases', ascending=False) # Sort by 'Total Purchases'
top_5_spenders = top_spenders.iloc[0:5,:] # Just get the top 5
top_5_spenders.style.format({
    'Total Purchases': '${:,.2f}'.format,
    'Average Purchase Price': '${:,.2f}'.format,
})

Unnamed: 0_level_0,Total Purchases,Purchase Count,Average Purchase Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,$18.96,5,$3.79
Idastidru52,$15.45,4,$3.86
Chamjask73,$13.83,3,$4.61
Iral74,$13.62,4,$3.40
Iskadarya95,$13.10,3,$4.37


### MOST POPULAR ITEMS

In [10]:
item_group = df.groupby(['Item Name','Item ID']) #Want Unique Items
pop_items = pd.DataFrame()#I'm going to make a DataFrame to house my statistics
pop_items['Purchase Count'] = item_group['Price'].count()
pop_items['Total Purchase Value'] = item_group['Price'].sum()
pop_items['Average Price'] = item_group['Price'].mean() #some items have multiple prices
pop_items = pop_items.sort_values('Purchase Count', ascending=False) #Sort by 'Purchase Count'
most_popular_items = pop_items.iloc[0:5,:] #Just get top 5
most_popular_items.style.format({
    'Total Purchase Value': '${:,.2f}'.format,
    'Average Price': '${:,.2f}'.format,
})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Total Purchase Value,Average Price
Item Name,Item ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Final Critic,92,13,$59.99,$4.61
"Oathbreaker, Last Hope of the Breaking Storm",178,12,$50.76,$4.23
Persuasion,132,9,$28.99,$3.22
Nirvana,82,9,$44.10,$4.90
"Extraction, Quickblade Of Trembling Hands",108,9,$31.77,$3.53


### MOST PROFITABLE ITEMS

In [11]:
prof_items = pop_items.sort_values('Total Purchase Value', ascending=False) # It's the same data as pop items
most_profitable_items = prof_items.iloc[0:5,:] #Just get top 5
most_profitable_items.style.format({
    'Total Purchase Value': '${:,.2f}'.format,
    'Average Price': '${:,.2f}'.format,
})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Total Purchase Value,Average Price
Item Name,Item ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Final Critic,92,13,$59.99,$4.61
"Oathbreaker, Last Hope of the Breaking Storm",178,12,$50.76,$4.23
Nirvana,82,9,$44.10,$4.90
Fiery Glass Crusader,145,9,$41.22,$4.58
Singed Scalpel,103,8,$34.80,$4.35
