# Heroes of Pymoli 
### Andrew Anastasiades | @andrew-ana
---

## LOAD DATA
---

In [None]:
# DEPENDENCIES
import pandas as pd #File IO and Data Manipulation
import os #OS agnostic file structure
# FILE PATHS
purchase_filename = os.path.join("Resources", "purchase_data.csv")
# INITIALIZE DATAFRAME FROM FILE
df = pd.read_csv(purchase_filename)

## INSPECT DATA
---

In [None]:
df.describe()

In [None]:
df.head(5)

## ANALYZE DATA
---

### PLAYER ANALYSIS

In [None]:
num_players = df['SN'].nunique() #Unique SN
print(f"There are {num_players} unique players in the game")

### PURCHASING ANALYSIS (TOTAL)

In [None]:
num_items = df['Item ID'].nunique() #Unique SN
print(f"There are {num_items} unique items")
avg_price = df['Price'].mean()
print(f"The average purchase price is ${avg_price:.2f}")
num_purchases = len(df) #Each row is a purchase
print(f"There are {num_purchases} purchases")
rev_total = df['Price'].sum()
print(f"Total Purchases were ${avg_price:.2f}")

### GENDER DEMOGRAPHICS

In [None]:
gender_group = df.groupby(by=['Gender']) #Group By Gender
gender_df = pd.DataFrame() #I'm going to make a DataFrame to house my statistics
gender_df["Players"] = gender_group['SN'].nunique()
gender_df["Percentage"] = gender_group['SN'].nunique() / num_players
gender_df.style.format({'Percentage': '{:,.2%}'.format}) #format and show

### PURCHASING ANALYSIS (GENDER)

In [None]:
gender_purchases_df = pd.DataFrame() #I'm going to make a DataFrame to house my statistics
gender_purchases_df["Purchases"] = gender_group['SN'].count() #count each row in each group
gender_purchases_df["Average Price"] = gender_group['Price'].mean() #Average
gender_purchases_df["Total Value"] = gender_group['Price'].sum() #Subtotal
gender_purchases_df["Value / Player"] = gender_purchases_df["Total Value"] / gender_df["Players"] #LTV = group rev / group size
gender_purchases_df.style.format({
    'Average Price': '${:,.2f}'.format,
    'Total Value': '${:,.2f}'.format,
    'Value / Player': '${:,.2f}'.format,
})

### AGE DEMOGRAPHICS

In [None]:
#First Prepare the Bins and Group
age_bin_max = round((max(df['Age'])-10) / 4)+1#MATH = How many bins will need? 
age_bins = [0,10] + [10+i*4 for i in range(1,age_bin_max)] #Make my bins
df['Age Group'] = pd.cut(df['Age'], age_bins, right=False) #Add 'Age Group' Column
age_groups = df.groupby(['Age Group']) #Groupby 'Age Group'

#Analyze
age_df = pd.DataFrame()#I'm going to make a DataFrame to house my statistics
age_df["Players"] = age_groups['SN'].nunique() #How many people in each group?
age_df["Purchases"] = age_groups['SN'].count() #How many purchases
age_df["Average Price"] = age_groups['Price'].mean() #Average purchase price
age_df["Total Value"] = age_groups['Price'].sum() #Sum of Prices
age_df["Value / Player"] = age_df["Total Value"] / age_df["Players"] #LTV = group rev / group size
age_df.style.format({
    'Average Price': '${:,.2f}'.format,
    'Total Value': '${:,.2f}'.format,
    'Value / Player': '${:,.2f}'.format,
})

### TOP SPENDERS

In [None]:
player_group = df.groupby('SN')#Want unique players
top_spenders = pd.DataFrame()#I'm going to make a DataFrame to house my statistics
top_spenders['Total Purchases'] = player_group['Price'].sum()
top_spenders['Purchase Count'] = player_group['Price'].count()
top_spenders['Average Purchase Price'] = top_spenders['Total Purchases'] / top_spenders['Purchase Count']
top_spenders = top_spenders.sort_values('Total Purchases', ascending=False) # Sort by 'Total Purchases'
top_5_spenders = top_spenders.iloc[0:5,:] # Just get the top 5
top_5_spenders.style.format({
    'Total Purchases': '${:,.2f}'.format,
    'Average Purchase Price': '${:,.2f}'.format,
})

### MOST POPULAR ITEMS

In [None]:
item_group = df.groupby(['Item Name','Item ID']) #Want Unique Items
pop_items = pd.DataFrame()#I'm going to make a DataFrame to house my statistics
pop_items['Purchase Count'] = item_group['Price'].count()
pop_items['Total Purchase Value'] = item_group['Price'].sum()
pop_items['Average Price'] = item_group['Price'].mean() #some items have multiple prices
pop_items = pop_items.sort_values('Purchase Count', ascending=False) #Sort by 'Purchase Count'
most_popular_items = pop_items.iloc[0:5,:] #Just get top 5
most_popular_items.style.format({
    'Total Purchase Value': '${:,.2f}'.format,
    'Average Price': '${:,.2f}'.format,
})

### MOST PROFITABLE ITEMS

In [None]:
prof_items = pop_items.sort_values('Total Purchase Value', ascending=False) # It's the same data as pop items
most_profitable_items = prof_items.iloc[0:5,:] #Just get top 5
most_profitable_items.style.format({
    'Total Purchase Value': '${:,.2f}'.format,
    'Average Price': '${:,.2f}'.format,
})