In [1]:
import os
import pandas as pd

In [2]:
input_path = os.path.join(".","Resources","purchase_data.csv")
purch_data = pd.read_csv(input_path)

<h2>Player Count</h2>

In [3]:
#Get the player count, and print the data frame
player_count = len(purch_data["SN"].unique())
total_players_df = pd.DataFrame({
    "Total Players":[player_count]
})
total_players_df.style.hide_index()

Total Players
576


<h2>Purchasing Analysis(Total)</h2>

- Number of Unique Items
- Average Purchase Price
- Total Number of Purchases
- Total Revenue

In [4]:
#Purchasing Analysis(Total)
#Number of Unique Items
unique_item_count = len(purch_data["Item ID"].unique())
#Average item price
average_item_price = "${:,.2f}". format(purch_data["Price"].mean())
#Total Number of Purchases
purchase_count = len(purch_data.index)
#Total Revenue
total_revenue = "${:,.2f}". format(purch_data["Price"].sum())
#Build dataframe for Segment
purchasing_analysis_total_df = pd.DataFrame({
    "Number of Unique Items":[unique_item_count],
    "Average Purchase Price":[average_item_price],
    "Total Number of Purchases":[purchase_count],
    "Total Revenue":[total_revenue]
})


purchasing_analysis_total_df

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


----------------------------
<h2>Gender Demographics</h2>

- Percentage and count of Male Players
- Percentage and count of Female Players
- Percentage and count of Other/Non-Disclosed Players

In [5]:
#Gender Demographics with groupby
gender_count_series = purch_data.groupby("Gender").nunique()["SN"]
gender_percentage = gender_count_series / player_count

gender_demographics = {
    'Total Count': gender_count_series,
    'Percentage of Players': gender_percentage.map("{:.2%}".format)
}
gender_demographics_df = pd.DataFrame(gender_demographics)
gender_demographics_df

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06%
Male,484,84.03%
Other / Non-Disclosed,11,1.91%


---
<h2>Purchasing Analysis (Gender)</h2>

- Purchase Count for each Category
- Total Purchase Value for each Catagory
- Average Purchase Value for each Category
- Average Total Purchase per Person for each Category

In [6]:
#Purchasing analysis(gender) using groupby
gender_data = purch_data.groupby("Gender")
gender_totals = gender_data["Price"].sum()
gender_counts = gender_data["Price"].count()
gender_average = gender_data["Price"].mean()
avg_per_person = gender_totals / gender_count_series

#Count, Average, Total, Avg per Person


gender_analysis_dict = {"Purchase Count":gender_counts,"Total Purchase Value":gender_totals,"Average Purchase Value":gender_average,"Average Total Purchase Per Person":avg_per_person}
gender_purchasing_analysis = pd.DataFrame(gender_analysis_dict)
#And format the data to be nice and pretty

gender_purchasing_analysis["Total Purchase Value"] = gender_purchasing_analysis["Total Purchase Value"].map("${:,.2f}".format)
gender_purchasing_analysis["Average Purchase Value"] = gender_purchasing_analysis["Average Purchase Value"].map("${:.2f}".format)
gender_purchasing_analysis["Average Total Purchase Per Person"] = gender_purchasing_analysis["Average Total Purchase Per Person"].map("${:.2f}".format)
gender_purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Total Purchase Value,Average Purchase Value,Average Total Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$361.94,$3.20,$4.47
Male,652,"$1,967.64",$3.02,$4.07
Other / Non-Disclosed,15,$50.19,$3.35,$4.56


---
<h2>Age Demographics</h2>

- Establish Age Groups
- Total Number of users in each group, and the Percentage of players they cover

In [7]:
#Age Demographics
#Add the Age Group column to the data set
age_bins = [0,9,14,19,24,29,34,39,float("inf")]
age_bin_names = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']
purch_data["Age Group"] = pd.cut(purch_data["Age"], age_bins, labels=age_bin_names,include_lowest=True)
age_data = purch_data.groupby("Age Group")

age_count_series = age_data.nunique()["SN"]
age_percentage = age_count_series / player_count

age_demographics = {
    'Total Count': age_count_series,
    'Percentage of Players': age_percentage.map("{:.2%}".format)
}
age_demographics_df = pd.DataFrame(age_demographics)
age_demographics_df

Unnamed: 0_level_0,Total Count,Percentage of Players
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


---
<h2>Purchasing Analysis (Age)</h2>

- Total number of purcahses for each age range
- Average Purchase price for each age range
- Total Purchase Value for each age range
- Average total purchase per Person for each age range

In [8]:
#Purchasing Analysis(AGE)
#Gather the Analysis by age

#Purchase Count
age_count = age_data["SN"].count()
#Average Purchase Price
age_average = age_data["Price"].mean()
#Total Purchase Value
age_total = age_data["Price"].sum()
#Average Purchase total Per Person
age_avg_per_person = age_total / age_count_series

age_dict = {'Purchase Count':age_count,'Average Purchase Price':age_average,'Total Purchase Value':age_total,"Avg Total Purchase per Person":age_avg_per_person}

age_purchasing_analysis = pd.DataFrame(age_dict)
age_purchasing_analysis["Average Purchase Price"] = age_purchasing_analysis["Average Purchase Price"].map("${:,.2f}".format)
age_purchasing_analysis["Total Purchase Value"] = age_purchasing_analysis["Total Purchase Value"].map("${:,.2f}".format)
age_purchasing_analysis["Avg Total Purchase per Person"] = age_purchasing_analysis["Avg Total Purchase per Person"].map("${:.2f}".format)
age_purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


---
<h2>Top Spenders</h2>

- Player with the most amount of purchases, and their Average Purchase Price and Total Purchase Value

In [9]:
#Top Spenders
top_spender_group = purch_data.groupby("SN")
spender_count = top_spender_group["SN"].count()
spender_average = top_spender_group["Price"].mean()
spender_total = top_spender_group["Price"].sum()

spender_dict = {"Purchase Count": spender_count,"Average Purchase Price":spender_average,"Total Purchase Value":spender_total}

spender_df = pd.DataFrame(spender_dict).sort_values("Total Purchase Value", ascending=False)

spender_df["Average Purchase Price"] = spender_df["Average Purchase Price"].map("${:.2f}".format)
spender_df["Total Purchase Value"] = spender_df["Total Purchase Value"].map("${:.2f}".format)
spender_df.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


---
<h2> Most Popular Items</h2>

- Most Popular Items, with their ID, sorted by number of purchases

In [10]:
#Most Popular Items

items_group = purch_data.groupby(["Item ID","Item Name"])
item_count = items_group["Item ID"].count()
item_price = items_group["Price"].mean()
item_total_purchase = items_group["Price"].sum()

item_dict = {"Purchase Count":item_count,"Item Price":item_price,"Total Purchase Value":item_total_purchase}

item_df = pd.DataFrame(item_dict).sort_values("Purchase Count",ascending=False)
item_df_clean = item_df.copy()
#sorted_item = unsorted_item.sort_values("Purchase Count",ascending=False)
item_df["Item Price"] = item_df["Item Price"].map("${:.2f}".format)
item_df["Total Purchase Value"] = item_df["Total Purchase Value"].map("${:.2f}".format)
item_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


--
<h2>Most Profitable Items</h2>

- Items with the most amount of profit

In [11]:
#Most Profitable Items
popular_item_df = item_df_clean.sort_values("Total Purchase Value", ascending=False)
popular_item_df["Item Price"] = popular_item_df["Item Price"].map("${:.2f}".format)
popular_item_df["Total Purchase Value"] = popular_item_df["Total Purchase Value"].map("${:.2f}".format)
popular_item_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80


<h1>Data Observations</h1>

Males dominate the playerbase with 84.03% (484 players) of the players, with females trailing behind on 14.06% (81 players). Female and Other/Non-Disclosed genders are willing to spend around 10% more than male players on average per person, per item, \$4.47 from Females (9.8% more)  and \$4.56 from Other/Non-Disclosed (12% more) vs the \$4.06 average spent by male players. 20-24 year olds dominate the playerbase and make 47% (\$1,114.06) of the revenue alone. However it should be noted that 35-39 year olds are willing to put in more money than any other age group on average per person (\$4.76)