# Heroes Of Pymoli Data Analysis
* Of the 1163 active players, the vast majority are male (84%). There also exists, a smaller, but notable proportion of female players (14%).

* Our peak age demographic falls between 20-24 (44.8%) with secondary groups falling between 15-19 (18.60%) and 25-29 (13.4%).  
-----

In [2]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data_df = pd.read_csv(file_to_load)

## Player Count
#### Key-Takeaways: 
* There are 576 unique players that have made purchases within Heroes of Pymoli

In [3]:
# Create data frame to display total number of unique players
Total_Unique_Players_df = pd.DataFrame({"Total # of Players" : [purchase_data_df["SN"].nunique()]})
Total_Unique_Players_df

Unnamed: 0,Total # of Players
0,576


## Purchasing Analysis
#### Key-Takeaways:

* There was 183 unique items sold
* The average sale was 3.05 dollars
* There was 780 in-game sales
* Total revenues/sales were 2,379.77 dollars

In [42]:
# find total number of unique items, average purchase price, number of purchases, and total revenue
total_unique = purchase_data_df["Item ID"].nunique() 
avg_price = purchase_data_df["Price"].mean()
total_pur = purchase_data_df["Purchase ID"].count()
total_rev = purchase_data_df["Price"].sum()

# create Purchase Analysis DataFrame to display results
pur_analysis = pd.DataFrame([{
    "Number of Unique Items": total_unique,
    "Average Purchase Price": avg_price,
    "Number of Purchases": total_pur,
    "Total Revenue": total_rev}])

# reorder columns
pur_analysis = pur_analysis[["Number of Unique Items", "Average Purchase Price", "Number of Purchases", "Total Revenue"]]

# format data frame
pur_analysis.style.format({"Average Purchase Price": '${:.2f}',
                           "Total Revenue": '${:,.2f}'})

Unnamed: 0,Number of Unique Items,Average Purchase Price,Number of Purchases,Total Revenue
0,183,$3.05,780,"$2,379.77"


## Gender Demographics
#### Key-Takeaways:
* Males = 84% (484)
* Females = 14% (81)
* Other = 2% (11)

In [56]:
# create a data frame of unique players by dropping Screen Name douplicates
no_dup_sn = purchase_data_df.drop_duplicates(["SN"])

# calculate total count of each gender and each respective percentage
plyr_count_by_gen = no_dup_sn.groupby("Gender")["SN"].count()
pct_of_plyrs_by_gen = plyr_count_by_gen / no_dup_sn["Gender"].count()

# consolidate series data from above into a dataframe
gender_demos_df = pd.DataFrame({
    "Total Count" : plyr_count_by_gen,
    "Percent of Players" : pct_of_plyrs_by_gen,})

# reorder columns
gender_demos_df = gender_demos_df[["Total Count", "Percent of Players"]]

# sort data frame by "Total Count"
sorted_gender_demos_df = gender_demos_df.sort_values("Total Count", ascending = False)

# format data frame
formatted_gender_demos_df = sorted_gender_demos_df.style.format({"Percent of Players" : "{:.02%}"})
formatted_gender_demos_df

Unnamed: 0_level_0,Total Count,Percent of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)
#### Key-Takeaways:
* Males drove the most sales (1,967.94)
* Males had x5.7 more buyers than Females 


In [55]:
# calculate purchase count, avg. purchase price, total purchase value, 
# and avg. purchase total per person by gender
pc_grpby = purchase_data_df.groupby("Gender")["Purchase ID"].count() 
app_grpby = purchase_data_df.groupby("Gender")["Price"].mean() 
pur_val_grpby = purchase_data_df.groupby("Gender")["Price"].sum() 
avg_pur_person_grpby= pur_val_grpby / no_dup_sn.groupby("Gender")["Price"].count()

# consolidate series data from above into a dataframe to 
# display Purchasing Analysis (Gender)
pur_analysis_gen = pd.DataFrame({
    "Purchase Count" : pc_grpby,
    "Average Purchase Price" : app_grpby,
    "Total Purchase Value" : pur_val_grpby,
    "Avg Purchase Total per Person" : avg_pur_person_grpby})

# reorder columns
pur_analysis_gen = pur_analysis_gen[["Purchase Count", "Average Purchase Price","Total Purchase Value", "Avg Purchase Total per Person"]]

# sort data in decending order based on purchase count
sorted_pur_analysis = pur_analysis_gen.sort_values("Purchase Count", ascending = False)

# format data frame
sorted_pur_analysis.style.format({
    "Average Purchase Price" : "${:.2f}",
    "Total Purchase Value" : "${:.2f}",
    "Avg Purchase Total per Person" : "${:.2f}"})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Purchase Total per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,652,$3.02,$1967.64,$4.07
Female,113,$3.20,$361.94,$4.47
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


## Age Demographics (AD)
#### Key Takeaways:
* Most players are in the 20-24 age range (44.79%) with 15-19 falling right behind it (18.58%)

In [54]:
# Establish the bins and labels
age_bins = [0, 10, 15, 20, 25, 30, 35, 40, 100]
age_labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

# Append age binned rows to the no douplicate sn and the original data set using the function "pd.cut()"
no_dup_sn["Age Range"] = pd.cut(no_dup_sn["Age"], age_bins, right=False, labels=age_labels, include_lowest=True)
purchase_data_df["Age Range"] = pd.cut(purchase_data_df["Age"], age_bins, right=False, labels=age_labels, include_lowest=True)

# calc total counts of age groups and percentages
age_groups_AD = no_dup_sn.groupby("Age Range")["Age"].count() # calc counts
age_groups_pct_AD = age_groups_AD / len(no_dup_sn) #calc percentages

# put series data into dataframe
age_demo_analysis = pd.DataFrame({
    "Total Count" : age_groups_AD,
    "Percentage of Players" : age_groups_pct_AD,})

# reorder columns
age_demo_analysis = age_demo_analysis[["Total Count", "Percentage of Players"]]

# format data
age_demo_analysis.style.format({"Percentage of Players" : "{:.02%}"})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,Total Count,Percentage of Players
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


## Purchasing Analysis (Age)
#### Key Takeaways
* 20-24 yr olds made the most purchases (365) with a total purchase value of 1,114.06
* 15-19 yr olds made the second most purchases (136) with a total purchase value of 412.89

In [45]:
# calculate purchase count, avg purchase price, total purchase value, and avg. total purchase per person by age range
pur_count_PAA =  purchase_data_df.groupby("Age Range")["Purchase ID"].count()
avg_pur_price_PAA =  purchase_data_df.groupby("Age Range")["Price"].mean()
ttl_pur_value_PAA =  purchase_data_df.groupby("Age Range")["Price"].sum()
avg_ttl_pur_value_prsn_PAA =  purchase_data_df.groupby("Age Range")["Price"].sum() / no_dup_sn.groupby("Age Range")["Price"].count() 

# place calculations from above into a data frame 
purchase_analysis_age_PAA = pd.DataFrame({
    "Purchase Count" : pur_count_PAA,
    "Average Purchase Price" : avg_pur_price_PAA,
    "Total Purchase Value" : ttl_pur_value_PAA,
    "Avg Total Purchase per Person" : avg_ttl_pur_value_prsn_PAA})

# reorder columns
purchase_analysis_age_PAA = purchase_analysis_age_PAA[["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Avg Total Purchase per Person"]]

# format data frame
purchase_analysis_age_PAA.style.format({
    "Average Purchase Price" : "${:.2f}",
    "Total Purchase Value" : "${:.2f}",
    "Avg Total Purchase per Person" : "${:.2f}"})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,$1114.06,$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


## Top Spenders
#### Key Takeaways:
* "Lisosia93" is the biggest spender, buying 5 items worth in total to 18.96

In [47]:
# Calculate Purchase Count, Average Purchase Price, and Total Purchase Value
pur_count_TP = purchase_data_df.groupby("SN")["Price"].count()
avg_pur_price_TP = purchase_data_df.groupby("SN")["Price"].mean()
ttl_pur_price_TP = purchase_data_df.groupby("SN")["Price"].sum()

# place calculations from above into a data frame 
top_spender_analysis_TP = pd.DataFrame({
    "Purchase Count" : pur_count_TP,
    "Average Purchase Price" : avg_pur_price_TP,
    "Total Purchase Value" : ttl_pur_price_TP})

# reorder columns
top_spender_analysis_TP = top_spender_analysis_TP[["Purchase Count", "Average Purchase Price", "Total Purchase Value"]]

# sort values in "Total Purchase Value" column in descending order 
sorted_TP = top_spender_analysis_TP.sort_values("Total Purchase Value", ascending=False).head()

# format data frame
sorted_TP.style.format({
    "Average Purchase Price" : '${:,.2f}'.format,
    "Total Purchase Value" : '${:,.2f}'.format})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items
#### Key Takeaways:
* The most popular item is the "Oathbreaker, Last Hope of the Breaking Storm" with 12 total purchases and total purchase value of 50.76

In [50]:
# calculate Purchase Count, Item Price, and Total Purchase Value
pur_count_MPoI = purchase_data_df.groupby(["Item ID", "Item Name"])["Item ID"].count()
item_price_MPoI = purchase_data_df.groupby(["Item ID", "Item Name"])["Price"].mean()
ttl_pur_value_MPoI = purchase_data_df.groupby(["Item ID", "Item Name"])["Price"].sum()

# place calculations from above into a data frame 
most_popular_items_MPoI = pd.DataFrame({
    "Purchase Count" : pur_count_MPoI,
    "Item Price" : item_price_MPoI,
    "Total Purchase Value" : ttl_pur_value_MPoI})

# reorder columns
most_popular_items_MPoI = most_popular_items_MPoI[["Purchase Count", "Item Price", "Total Purchase Value"]]

# sort values in "Total Purchase Value" column in descending order 
sorted_most_popular_items_MPoI = most_popular_items_MPoI.sort_values("Purchase Count", ascending=False).head()

# format data frame
sorted_most_popular_items_MPoI.style.format({
    "Item Price" : '${:,.2f}'.format,
    "Total Purchase Value" : '${:,.2f}'.format})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


## Most Profitable Items
#### Key Takeaways:
* The most profitable items are Oathbreaker [...] (50.76), Nirvana (44.10), and Fiery Glass Crusader (41.22)

In [51]:
# sort previous data set in descending order by Total Purchase Value
most_profitable_items = most_popular_items_MPoI.sort_values("Total Purchase Value", ascending = False).head()

# format data frame
most_profitable_items.style.format({
    "Price" : '${:,.2f}'.format,
    "Item Price" : '${:,.2f}'.format,
    "Total Purchase Value" : '${:,.2f}'.format})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
92,Final Critic,8,$4.88,$39.04
103,Singed Scalpel,8,$4.35,$34.80
