# Heroes of Pymoli 
### Andrew Anastasiades | @andrew-ana
---

## LOAD DATA
---

In [1]:
## DEPENDENCIES
import pandas as pd #File IO and Data Manipulation
import os #OS agnostic file structure

## FILE PATHS
purchase_filename = os.path.join("Resources", "purchase_data.csv")

## INITIALIZE DATAFRAME FROM FILE
df = pd.read_csv(purchase_filename)

## INSPECT DATA
---

In [2]:
df.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,91.755128,3.050987
std,225.310896,6.659444,52.697702,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,47.75,1.98
50%,389.5,22.0,92.0,3.15
75%,584.25,25.0,138.0,4.08
max,779.0,45.0,183.0,4.99


In [3]:
df.head(5)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## ANALYZE DATA
---

In [4]:
# PLAYER ANALYSIS
num_players = df['SN'].nunique() #Unique SN
print(f"There are {num_players} unique players in the game")

There are 576 unique players in the game


In [6]:
# PURCHASING ANALYSIS (TOTAL)
num_items = df['Item ID'].nunique() #Unique SN
print(f"There are {num_items} unique items")
avg_price = df['Price'].mean()
print(f"The average purchase price is ${avg_price:.2f}")
num_purchases = len(df) #Each row is a purchase
rev_total = df['Price'].sum()


There are 179 unique items
The average purchase price is $3.05


In [None]:
# GENDER DEMOGRAPHICS
gender_group = df.groupby(by=['Gender']) #Group By Gender
gender_num = gender_group['SN'].nunique() #Shows Male, Female and Other
gender_percent = gender_num/num_players #Divide by Unique Players

# PURCHASING ANALYSIS (GENDER)
gender_purchases = gender_group['SN'].count() #count each row in each group
gender_average_price = gender_group['Price'].mean() #Average
gender_revenue_total = gender_group['Price'].sum() #Subtotal
gender_player_LTV = gender_revenue_total / gender_num #LTV = group rev / group size

# AGE DEMOGRAPHICS
#First Prepare the Bins and Group
age_bin_max = round((max(df['Age'])-10) / 4)+1#MATH = How many bins will need? 
age_bins = [0,10] + [10+i*4 for i in range(1,age_bin_max)] #Make my bins
df['Age Group'] = pd.cut(df['Age'], age_bins, right=False) #Add 'Age Group' Column
age_groups = df.groupby(['Age Group']) #Groupby 'Age Group'
#Analyze
age_num = age_groups['SN'].nunique() #How many people in each group?
age_purchases = age_groups['SN'].count() # How many purchases
age_average_price = age_groups['Price'].mean() #Average purchase price
age_revenue_total = age_groups['Price'].sum() #Sum of Prices
age_player_LTV = age_revenue_total / age_num #LTV = group rev / group size

# TOP SPENDERS
player_group = df.groupby('SN')#Want unique players
top_spenders = pd.DataFrame()#I'm going to make a DataFrame to house my statistics
top_spenders['Total Purchases'] = player_group['Price'].sum()
top_spenders['Purchase Count'] = player_group['Price'].count()
top_spenders['Average Purchase Price'] = top_spenders['Total Purchases'] / top_spenders['Purchase Count']
top_spenders = top_spenders.sort_values('Total Purchases', ascending=False) # Sort by 'Total Purchases'
top_5_spenders = top_spenders.iloc[0:5,:] # Just get the top 5

# MOST POPULAR ITEMS
item_group = df.groupby(['Item Name','Item ID']) #Want Unique Items
pop_items = pd.DataFrame()#I'm going to make a DataFrame to house my statistics
pop_items['Purchase Count'] = item_group['Price'].count()
pop_items['Total Purchase Value'] = item_group['Price'].sum()
pop_items['Average Price'] = item_group['Price'].mean() #some items have multiple prices
pop_items = pop_items.sort_values('Purchase Count', ascending=False) #Sort by 'Purchase Count'
most_popular_items = pop_items.iloc[0:5,:] #Just get top 5

# MOST PROFITABLE ITEMS
prof_items = pop_items.sort_values('Total Purchase Value', ascending=False) # It's the same data as pop items
most_profitable_items = prof_items.iloc[0:5,:] #Just get top 5

In [None]:
## HIGHLIGHTED STATISTICS
#Total
num_players
num_items
avg_price
num_purchases
rev_total
#Gender
gender_num
gender_percent
gender_purchases
gender_average_price
gender_revenue_total
gender_player_LTV
#Age
age_num
age_purchases
age_average_price
age_revenue_total
age_player_LTV
#Players
top_5_spenders
#Items
most_popular_items
most_profitable_items

In [None]:
num_purchases