In [1]:
%load_ext lab_black
import pandas as pd

file = "Resources/purchase_data.csv"

df = pd.read_csv(file)

## Player Count

* Total Number of Player

In [2]:
# calulated the total number of unique players

df_unique_player = df.drop_duplicates("SN")

# created a new data frame with the total players
total_players = pd.DataFrame({"Total Players": [len(df_unique_player)]})

total_players

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Summary of Items Purchased

In [3]:
# calculated each aspect using different methods like unique, mean and sum
unique_item = len(df["Item ID"].unique())
avg_price = df["Price"].mean()
total_item = df["Item ID"].count()
total_rev = df["Price"].sum()

# create data frame with calculated data
purchase_analysis_total = pd.DataFrame(
    {
        "Number of Unique Items": [unique_item],
        "Average Price": [avg_price],
        "Number of Purchases": [total_item],
        "Total Revenue": [total_rev],
    }
)

purchase_analysis_total = purchase_analysis_total.round(2)

purchase_analysis_total

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,3.05,780,2379.77


## Gender Demographics

* Percentage and Count of Genders

In [4]:
# count players by gender
total_gender = df_unique_player["Gender"].value_counts()

gender_counts = (total_gender[0], total_gender[1], total_gender[2])

# calculate gender percentages
percents = [
    (total_gender[0] / len(df_unique_player)) * 100,
    (total_gender[1] / len(df_unique_player)) * 100,
    (total_gender[2] / len(df_unique_player)) * 100,
]

# create data frame with calculated data
gender_demo = pd.DataFrame(
    {"Total Count": gender_counts, "Percentage of Players": percents}
)

# set gender index
gender_demo.index = ["Male", "Female", "Other/Non-Disclosed"]

gender_demo = gender_demo.round(2)

gender_demo

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03
Female,81,14.06
Other/Non-Disclosed,11,1.91


## Purchasing Analysis (Gender)

* Summary of Items Purchased by Gender

In [5]:
# group by gender
gender = df.groupby(["Gender"])

# use the groups to calculate the purchase count and averages
purchase_cnt = gender["SN"].count()
avg_gen_price = gender["Price"].mean()
price_sum = gender["Price"].sum()
avg_price_person = price_sum / total_gender

# create data frame with calculated data
purchase_analysis_gender = pd.DataFrame(
    {
        "Purchase Count": purchase_cnt,
        "Average Purchase Price": avg_gen_price,
        "Total Purchase Value": price_sum,
        "Avg Total Purchase per Person": avg_price_person,
    }
)

purchase_analysis_gender = purchase_analysis_gender.round(2)

purchase_analysis_gender

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.2,361.94,4.47
Male,652,3.02,1967.64,4.07
Other / Non-Disclosed,15,3.35,50.19,4.56


## Age Demographics

* Count and Percentage of Players per Age Group

In [6]:
# establish bins for age
age_bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

age_range = pd.cut(df_unique_player["Age"], bins=age_bins, labels=group_names)

age_demo_cnt = age_range.value_counts()

# calculate percent using the age count and the unique player count
age_demo_percent = (age_demo_cnt / len(df_unique_player)) * 100

# create data frame with calculation
age_demo = pd.DataFrame(
    {"Total Count": age_demo_cnt, "Percentage of Players": age_demo_percent}
)

age_demo = age_demo.round(2).sort_index()

age_demo

Unnamed: 0,Total Count,Percentage of Players
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
40+,12,2.08
