In [1]:
import pandas as pd
import numpy as np

file_to_load = "../resources/purchase_data.csv"

purchase_data = pd.read_csv(file_to_load)
all_data_df = pd.DataFrame(purchase_data)

# Player Count

In [2]:
unique_players_df = pd.DataFrame(purchase_data.drop_duplicates(subset=["SN"]))
total_player_count2_df = pd.DataFrame(purchase_data) ["Gender"].count

unique_player_count = len(purchase_data["SN"].unique())
player_count_total = pd.DataFrame({"Number of Players": [unique_player_count]})
player_count_total

Unnamed: 0,Number of Players
0,576


# Purchasing Analysis (Total)

In [3]:
unique_items = len(purchase_data["Item ID"].unique())
average_price = (purchase_data["Price"].mean())
average_price_format = ("${:,.2f}".format(average_price))
number_of_purchases = len(purchase_data)
total_revenue = (purchase_data["Price"].sum())
total_revenue_format = ("${:,.2f}".format(total_revenue))

purchase_analysis_df = pd.DataFrame({"Unique Players": [unique_player_count], 
                                  "Unique Items": [unique_items], 
                                  "Average Price": [average_price_format], 
                                  "Number of Purchases": [number_of_purchases],
                                  "Total Revenue": [total_revenue_format]})
purchase_analysis_df

Unnamed: 0,Unique Players,Unique Items,Average Price,Number of Purchases,Total Revenue
0,576,183,$3.05,780,"$2,379.77"


# Gender Demographics

In [4]:
gender_type_df = unique_players_df.groupby(["Gender"])
total_player_count = gender_type_df["Gender"].count()
total_player_fraction = gender_type_df["Gender"].count() / unique_player_count

gender_demographics = pd.DataFrame({"Total Count": total_player_count,
                                   "Percent of Players": (((total_player_fraction)*100))}).sort_values(by=["Total Count"], ascending = False)

gender_demographics["Percent of Players"] = gender_demographics["Percent of Players"].map("{:.2f}%".format)
gender_demographics

Unnamed: 0_level_0,Total Count,Percent of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


# Purchasing Analysis (Gender)

In [5]:
df = pd.DataFrame(purchase_data)

purchases_gender_df = df.groupby(df["Gender"])[["Purchase ID"]].count()
total_revenue_gender_df = df.groupby(df["Gender"])[["Price"]].sum()
purchases_gender_df = df.groupby(df["Gender"])[["Purchase ID"]].count()
ave_price_gender_df = df.groupby(df["Gender"])[["Price"]].mean()
mean = gender_type_df["Price"].mean()
total = gender_type_df["Price"].sum()


In [6]:
merged_1_2_df = pd.merge(purchases_gender_df, ave_price_gender_df, on="Gender",)
merged_1_2_3_df = pd.merge(merged_1_2_df, total_revenue_gender_df, on="Gender")
df = pd.merge(merged_1_2_3_df, mean, on="Gender")
df.columns = ["Transaction Count", "Average Price", "Total Revenue", "Average per Person"]

df["Average Price"] = df["Average Price"].map("${:.2f}".format)
df["Total Revenue"] = df["Total Revenue"].map("${:.2f}".format)
df["Average per Person"] = df["Average per Person"].map("${:.2f}".format)
df.sort_values(by = ["Transaction Count"], ascending = False)

Unnamed: 0_level_0,Transaction Count,Average Price,Total Revenue,Average per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,652,$3.02,$1967.64,$3.05
Female,113,$3.20,$361.94,$3.17
Other / Non-Disclosed,15,$3.35,$50.19,$3.41


# Age Demographics

In [14]:

bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 999999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

unique_player_list = purchase_data["SN"].nunique()
purchase_data["age_group"] = pd.cut(all_data_df["Age"], bins, labels = group_names)


age_count = purchase_data["age_group"].value_counts()

percentage_players = (age_count/unique_player_list)*100
percent_players = purchase_data.groupby("age_group").count()
age_demographics = pd.DataFrame({"Total Counts": age_count, 
                                 "Percentage of Players": percentage_players}).sort_index()

age_demographics["Percentage of Players"] = age_demographics["Percentage of Players"].map("{:.2f}%".format)
age_demographics

Unnamed: 0,Total Counts,Percentage of Players
<10,23,3.99%
10-14,28,4.86%
15-19,136,23.61%
20-24,365,63.37%
25-29,101,17.53%
30-34,73,12.67%
35-39,41,7.12%
40+,13,2.26%


# Purchasing Analysis (Age)


In [8]:
# bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 999999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
purchase_data["age_group"] = pd.cut(all_data_df["Age"], bins, labels = group_names)
purchase_data["price"] = pd.cut(all_data_df["Price"], bins, labels = group_names)


average_total_per_person = total_revenue / age_count
age_count = purchase_data["age_group"].value_counts()
purchasing_analysis = pd.DataFrame({"Purchase Count": age_count,
                                    "Average Purchase Price": average_total_per_person,
                                   "Total Purchase Value": total_revenue,
                                   "Avg Total Purchase / Person": total_revenue}).sort_index()
purchasing_analysis

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase / Person
<10,23,103.468261,2379.77,2379.77
10-14,28,84.991786,2379.77,2379.77
15-19,136,17.498309,2379.77,2379.77
20-24,365,6.519918,2379.77,2379.77
25-29,101,23.562079,2379.77,2379.77
30-34,73,32.599589,2379.77,2379.77
35-39,41,58.043171,2379.77,2379.77
40+,13,183.059231,2379.77,2379.77


# Most Popular Items

# Most Profitable Items