# Heroes

In [646]:
import pandas as pd
import os

In [647]:
filename = os.path.join('raw_data','purchase_data.json')
#filename = os.path.join('raw_data','purchase_data2.json')

In [648]:
purchases = pd.read_json(filename)

### Player count

In [649]:
total_players = purchases['SN'].nunique()
total_players

573

### Purchasing Analysis (Total)


In [650]:
items = len(purchases['Item Name'].unique())

In [651]:
average_price = purchases['Price'].mean()
average_price = "${:,.2f}".format(average_price)

In [652]:
total_revenue = purchases['Price'].sum()
total_revenue = "${:,.2f}".format(total_revenue)

In [653]:
total_purchases = len(purchases)
items = purchases[['Item ID','Item Name','Price']]
items = items.sort_values(by='Item ID')
items = items.drop_duplicates()
items = items['Item ID'].count()

In [654]:
purchases_dict = {'Number of Unique Items':[items],
                  'Average Purchase Price': [average_price],
                  'Total Number of Purchases':[total_purchases],
                  'Total Revenue':[total_revenue]
                 }
purchases_df = pd.DataFrame(purchases_dict)
purchases_df

Unnamed: 0,Average Purchase Price,Number of Unique Items,Total Number of Purchases,Total Revenue
0,$2.93,183,780,"$2,286.33"


### Gender Demographics

Percentage and Count of Male Players
Percentage and Count of Female Players
Percentage and Count of Other / Non-Disclosed

In [655]:
byGender = purchases.groupby('Gender')

In [656]:
dem = pd.DataFrame(byGender['SN'].nunique())

In [657]:
dem['percentage'] = dem['SN'].divide(total_players)
dem['percentage'] = dem['percentage'].multiply(100)
dem['percentage'] = dem['percentage'].map("{:,.2f}%".format)

In [658]:
dem

Unnamed: 0_level_0,SN,percentage
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,100,17.45%
Male,465,81.15%
Other / Non-Disclosed,8,1.40%


### Purchasing Analysis (Gender)


In [659]:
purchase_count = byGender['SN'].count()
average_purchase_price = byGender['Price'].mean()
total_purchase_value = byGender['Price'].sum()
normalized_total = total_purchase_value / purchase_count

In [660]:
dem['purchase count'] = purchase_count
dem['average purchase'] = average_purchase_price.map("${:,.2f}".format)
dem['total purchase'] = total_purchase_value.map("${:,.2f}".format)
dem['Normalized total'] = normalized_total.map("${:,.2f}".format)
dem

Unnamed: 0_level_0,SN,percentage,purchase count,average purchase,total purchase,Normalized total
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,100,17.45%,136,$2.82,$382.91,$2.82
Male,465,81.15%,633,$2.95,"$1,867.68",$2.95
Other / Non-Disclosed,8,1.40%,11,$3.25,$35.74,$3.25


### Age Demographics

In [661]:
ages = [0,10,14,19,24,29,34,100]
age_buckets = ['<10','10-14','15-19','20-24','25-29','30-35','>35']

purchase_age = pd.cut(purchases['Age'],bins=ages,labels=age_buckets)
purchases['Age Group'] = purchase_age
byAge = purchases.groupby('Age Group')
age_purchase_count = byAge['Age Group'].count()
age_average_purchase_price = byAge['Price'].mean()
age_total_purchase_value = byAge['Price'].sum()
age_normalized_total = age_total_purchase_value / age_purchase_count
age_demog_df = pd.DataFrame({'Purchase Count':age_purchase_count,
                            'Average Purchase Price':age_average_purchase_price.map("${:,.2f}".format),
                            'Total Purchase Value':age_total_purchase_value.map("${:,.2f}".format),
                            'Normalized totals':age_normalized_total.map("${:,.2f}".format)})

In [662]:
age_demog_df

Unnamed: 0_level_0,Average Purchase Price,Normalized totals,Purchase Count,Total Purchase Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,$3.02,$3.02,32,$96.62
10-14,$2.70,$2.70,31,$83.79
15-19,$2.91,$2.91,133,$386.42
20-24,$2.91,$2.91,336,$978.77
25-29,$2.96,$2.96,125,$370.33
30-35,$3.08,$3.08,64,$197.25
>35,$2.93,$2.93,59,$173.15


# Top Spenders

In [663]:

byName = purchases.groupby('SN')
total_spend = byName['Price'].sum()
purchase_count =byName['Price'].count()
average_price = byName['Price'].mean()

top_spend = pd.DataFrame({'Total Purchase Value':total_spend,
                             'Purchase Count':purchase_count,
                             'Average Purchase Price':average_price})

top_spend = top_spend.sort_values(by='Total Purchase Value', ascending=False)[:5]
top_spend['Total Purchase Value'] = top_spend['Total Purchase Value'].map("${:,.2f}".format)
top_spend['Average Purchase Price'] = top_spend['Average Purchase Price'].map("${:,.2f}".format)

In [664]:
top_spend

Unnamed: 0_level_0,Average Purchase Price,Purchase Count,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,$3.41,5,$17.06
Saedue76,$3.39,4,$13.56
Mindimnya67,$3.18,4,$12.74
Haellysu29,$4.24,3,$12.73
Eoda93,$3.86,3,$11.58


### Most Popular Items

In [665]:
byItem = purchases.groupby('Item ID')
item_name = byItem['Item Name'].unique()
item_name = item_name.map("%s".join)
item_price = byItem['Price'].unique()
total_spend = byItem['Price'].sum()
purchase_count =byItem['Price'].count()
average_price = byItem['Price'].mean()

popular_dict = pd.DataFrame({'Total Purchase Value':total_spend,
                             'Purchase Count':purchase_count,
                             'Item Name':item_name,
                             'Item Price':item_price
                             })

popular_df = pd.DataFrame(popular_dict)
popular_item = popular_df.sort_values(by='Purchase Count', ascending=False)[:5]


popular_item['Total Purchase Value']= popular_item['Total Purchase Value'].map("${:,.2f}".format)
popular_item['Item Price'] = popular_item['Item Price'].astype(float).map("${:,.2f}".format)
popular_item


Unnamed: 0_level_0,Item Name,Item Price,Purchase Count,Total Purchase Value
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",$2.35,11,$25.85
84,Arcane Gem,$2.23,11,$24.53
31,Trickster,$2.07,9,$18.63
175,Woeful Adamantite Claymore,$1.24,9,$11.16
13,Serenity,$1.49,9,$13.41


### Most Profitable Items

In [666]:
profitable_dict = pd.DataFrame({'Total Purchase Value':total_spend,
                             'Purchase Count':purchase_count,
                             'Item Name':item_name,
                             'Item Price':item_price.astype(float).map("${:,.2f}".format)
                             })
profitable_df = pd.DataFrame(profitable_dict)
profitable = profitable_df.sort_values(by='Total Purchase Value',ascending=False)[:5]

profitable['Total Purchase Value'] = profitable['Total Purchase Value'].map("${:,.2f}".format)

In [667]:
profitable

Unnamed: 0_level_0,Item Name,Item Price,Purchase Count,Total Purchase Value
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,$4.14,9,$37.26
115,Spectral Diamond Doomblade,$4.25,7,$29.75
32,Orenmir,$4.95,6,$29.70
103,Singed Scalpel,$4.87,6,$29.22
107,"Splitter, Foe Of Subtlety",$3.61,8,$28.88
