# Heroes Of Pymoli Data Analysis
* Out of the 780 surveyed, the vast majority that play this game are male (81%)

* The Peak of ages that this game are ages 20-24, with 43% of players in that age range.

* It seems the most popular item is also the most profitable.  The Bone Crushing Silver Skewer
-----

In [4]:
# Import pandas library
import pandas as pd
import numpy as np

In [17]:
#create a reference path
json_path = "purchase_data.json"

#Read it into a Pandas DataFrame
df = pd.read_json(json_path, orient='columns')

#Print the first five rows
df.head()


Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [18]:
# Display a statistical overview of the data frame
# Average Purchase Price
# Total Number of Purchases

df.describe()

Unnamed: 0,Age,Item ID,Price
count,780.0,780.0,780.0
mean,22.729487,91.29359,2.931192
std,6.930604,52.707537,1.11578
min,7.0,0.0,1.03
25%,19.0,44.0,1.96
50%,22.0,91.0,2.88
75%,25.0,135.0,3.91
max,45.0,183.0,4.95


In [39]:
# Run basic calculations
average_item_price = df["Price"].mean()
total_purchase_value = df["Price"].sum()
purchase_count = df["Price"].count()
item_count = len(df["Item ID"].unique())
num_gender = df['Gender'].count()

# Create a DataFrame to hold results
summary_table = pd.DataFrame({"Number of Unique Items": item_count,
                              "Total Revenue": [total_purchase_value],
                              "Number of Purchases": [purchase_count],
                              "Average Price": [average_item_price]})

# Minor Data Munging
summary_table = summary_table.round(2)
summary_table ["Average Price"] = summary_table["Average Price"].map("${:,.2f}".format)
summary_table ["Number of Purchases"] = summary_table["Number of Purchases"].map("{:,}".format)
summary_table ["Total Revenue"] = summary_table["Total Revenue"].map("${:,.2f}".format)
summary_table = summary_table.loc[:,["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"]]

# Display the summary_table
summary_table

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


## Gender Demographics

In [46]:
# Gender Count

total_gender = df['Gender'].value_counts()
gender_dems = (total_gender / num_gender) * 100
gender_demographics = pd.DataFrame({"Total Count": total_gender, "Percentage of Players": gender_dems})

# Minor Data Munging
gender_demographics = gender_demographics.round(2)

gender_demographics

Unnamed: 0,Percentage of Players,Total Count
Male,81.15,633
Female,17.44,136
Other / Non-Disclosed,1.41,11


## Purchasing Anaysis by Gender


In [77]:
# Basic Calculations
gender_purchase_total = purchase_data.groupby(["Gender"]).sum()["Price"].rename("Total Purchase Value")
gender_average = purchase_data.groupby(["Gender"]).mean()["Price"].rename("Average Purchase Price")
gender_counts = purchase_data.groupby(["Gender"]).count()["Price"].rename("Purchase Count")

# Totals
norm_totals = gender_purchase_total / gender_demographics['Total Count']

# Convert to DataFrame
gender_data = pd.DataFrame({"Purchase Count": gender_counts, 
                            "Average Purchase Price": gender_average, 
                            "Total Purchase Value": gender_purchase_total,
                            "Normalized Totals": norm_totals})

# Formatting
gender_data['Average Purchase Price'] = gender_data['Average Purchase Price'].map("${:,.2f}".format)
gender_data['Total Purchase Value'] = gender_data['Total Purchase Value'].map("${:,.2f}".format)
gender_data['Normalized Totals'] = gender_data['Normalized Totals'].map("${:,.2f}".format)


gender_data

Unnamed: 0_level_0,Average Purchase Price,Normalized Totals,Purchase Count,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,$2.82,$2.82,136,$382.91
Male,$2.95,$2.95,633,"$1,867.68"
Other / Non-Disclosed,$3.25,$3.25,11,$35.74


## Age Demographics

In [84]:
# Purchase Count by Age (broken into bins of 4 years) (i.e. <10, 10-14, 15-19, etc.)

count_by_age = df.groupby(['Age']).count()


def get_age_bucket(age):
    lower = age - (age % 5) 
    upper = lower + 4
    return str(lower) + '-' + str(upper)

df['AgeBucket'] = df['Age'].map(get_age_bucket)

age_dem_totals = df['AgeBucket'].value_counts()
age_dem_percent = age_dem_totals / df['SN'].count() * 100
age_dems = pd.DataFrame({'Total Count': age_dem_totals,
                        'Percentage of Players': age_dem_percent})

age_dems = age_dems.round(2)
age_dems.sort_index()


Unnamed: 0,Percentage of Players,Total Count
10-14,4.49,35
15-19,17.05,133
20-24,43.08,336
25-29,16.03,125
30-34,8.21,64
35-39,5.38,42
40-44,2.05,16
45-49,0.13,1
5-9,3.59,28


## Purchasing Analysis by Age

In [103]:
age_purchase_total = df.groupby(['AgeBucket']).sum()['Price'].rename('Total Purchase Value')
age_average = df.groupby(['AgeBucket']).mean()['Price'].rename('Age Averages')
age_counts = df.groupby(['AgeBucket']).count()['Price'].rename('Age Counts')

norm_total = age_purchase_total / df['Price'].count()

age_data = pd.DataFrame({'Age Purchase Total': age_purchase_total,
                        'Age Average': age_average,
                        'Age Counts': age_counts,
                        'Nomalized Total': norm_total})

age_data

Unnamed: 0_level_0,Age Average,Age Counts,Age Purchase Total,Nomalized Total
AgeBucket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10-14,2.77,35,96.95,0.124295
15-19,2.905414,133,386.42,0.49541
20-24,2.913006,336,978.77,1.254833
25-29,2.96264,125,370.33,0.474782
30-34,3.082031,64,197.25,0.252885
35-39,2.842857,42,119.4,0.153077
40-44,3.189375,16,51.03,0.065423
45-49,2.72,1,2.72,0.003487
5-9,2.980714,28,83.46,0.107


## Top 5 Spenders

In [104]:
# Identify the top 5 spenders by total purchase value, then list(in a table)
# SN, Purchase Count, Average Purchase Price, Total Purchase


#grouped_spend_sum = df.groupby(['SN']).agg({'Price':sum})

grouped_spend_sum = df.groupby(['SN']).sum()['Price']
total_purchase = pd.DataFrame(grouped_spend_sum)

#total_purchase.columns = ['SN', 'Total Purchase']
total_purchase.reset_index(inplace=True)

total_purchase = total_purchase.sort_values(['Price'], ascending = False)

total_purchase.head()





Unnamed: 0,SN,Price
538,Undirrala66,17.06
428,Saedue76,13.56
354,Mindimnya67,12.74
181,Haellysu29,12.73
120,Eoda93,11.58


In [105]:
#Average Purchase Price


grouped_average = df.groupby('SN')['Price'].mean()

average_purchase = pd.DataFrame(grouped_average)

average_purchase.reset_index(inplace=True)

average_purchase.head()


Unnamed: 0,SN,Price
0,Adairialis76,2.46
1,Aduephos78,2.233333
2,Aeduera68,1.933333
3,Aela49,2.46
4,Aela59,1.27


In [106]:
#Purchase Count

grouped_purchase_count = df.groupby('SN')['Price'].count()
grouped_purchase_count = pd.DataFrame(grouped_purchase_count)
grouped_purchase_count.reset_index(inplace=True)
grouped_purchase_count.head()

Unnamed: 0,SN,Price
0,Adairialis76,1
1,Aduephos78,3
2,Aeduera68,3
3,Aela49,1
4,Aela59,1


In [107]:
#Merged Data Frame
#grouped_purchase_count

merged_top_spenders = pd.merge(total_purchase, average_purchase, how='outer',  on='SN')
merged_top_spenders.head()



Unnamed: 0,SN,Price_x,Price_y
0,Undirrala66,17.06,3.412
1,Saedue76,13.56,3.39
2,Mindimnya67,12.74,3.185
3,Haellysu29,12.73,4.243333
4,Eoda93,11.58,3.86


In [108]:
merge_top_spenders2 = pd.merge(merged_top_spenders, grouped_purchase_count, how='outer', on='SN')

merge_top_spenders2 = merge_top_spenders2.rename(columns = {'Price_x': 'Total Price',
                                               'Price_y': 'Average Price',
                                               'Price' : 'Purchase Count'
                                              })

merge_top_spenders2.head()



Unnamed: 0,SN,Total Price,Average Price,Purchase Count
0,Undirrala66,17.06,3.412,5
1,Saedue76,13.56,3.39,4
2,Mindimnya67,12.74,3.185,4
3,Haellysu29,12.73,4.243333,3
4,Eoda93,11.58,3.86,3


## Most Popular Items

In [109]:
# Identify the 5 most popular items by purchase count, then list in a table. (Item ID, Item Name, Purchase Count, Item Price, Total Purchase Value) 



grouped_item_count = pd.DataFrame(df['Item ID'].value_counts())

grouped_item_count.reset_index(inplace=True)
grouped_item_count.columns = ['Item ID', 'Purchase Count']
grouped_item_count['Item Name'] = df['Item Name']



grouped_item_count.head()




Unnamed: 0,Item ID,Purchase Count,Item Name
0,84,11,Bone Crushing Silver Skewer
1,39,11,"Stormbringer, Dark Blade of Ending Misery"
2,31,9,Primitive Blade
3,34,9,Final Critic
4,175,9,Stormfury Mace


In [110]:
#Item Price

grouped_item_count['Price'] = df['Price']
grouped_item_count.head()


Unnamed: 0,Item ID,Purchase Count,Item Name,Price
0,84,11,Bone Crushing Silver Skewer,3.37
1,39,11,"Stormbringer, Dark Blade of Ending Misery",2.32
2,31,9,Primitive Blade,2.46
3,34,9,Final Critic,1.36
4,175,9,Stormfury Mace,1.27


In [111]:
#Total Purchase Value

grouped_item_count['Purchase Total'] = grouped_item_count['Price'] * grouped_item_count['Purchase Count']
grouped_item_count.head()


Unnamed: 0,Item ID,Purchase Count,Item Name,Price,Purchase Total
0,84,11,Bone Crushing Silver Skewer,3.37,37.07
1,39,11,"Stormbringer, Dark Blade of Ending Misery",2.32,25.52
2,31,9,Primitive Blade,2.46,22.14
3,34,9,Final Critic,1.36,12.24
4,175,9,Stormfury Mace,1.27,11.43


## Most Profitable Items

In [112]:
#(Item ID, Item Name, Purchase Count, Item Price, Total Purchase Value)


profitable_items = grouped_item_count.sort_values(['Purchase Total'], ascending = False)
profitable_items.head()


Unnamed: 0,Item ID,Purchase Count,Item Name,Price,Purchase Total
0,84,11,Bone Crushing Silver Skewer,3.37,37.07
6,106,8,Mercenary Sabre,4.57,36.56
9,65,8,"Expiration, Warscythe Of Lost Worlds",4.53,36.24
19,66,7,"Winterthorn, Defender of Shifting Worlds",4.89,34.23
15,154,7,Blood-Forged Skeletal Spine,4.77,33.39
