In [2]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [88]:
purchase_data

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [None]:
# "${:,.2f}". format(amount)

# Count total players

In [53]:
total_players = len(purchase_data.SN.value_counts())
total_player_dict = [{"Total Player": total_players}]

total_player_df = pd.DataFrame(total_player_dict)
total_player_df.head()

Unnamed: 0,Total Player
0,576


# Purchase analysis

In [121]:
unique_item = len(purchase_data["Item Name"].value_counts())
avg_price = purchase_data.Price.mean()
num_of_purchase = purchase_data["Purchase ID"].count()
total_revenue = purchase_data.Price.sum()


data_insert = {'Number of Unique Items' : [unique_item], 'Average Price' : [avg_price], 
               'Number of Purchases': [num_of_purchase], 'Total Revenue' : [total_revenue]}
purchase_analysis_df = pd.DataFrame(data = data_insert)

purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.050987,780,2379.77


# Gender Demographic

In [61]:
#male users calculation
male_users_df = purchase_data.loc[purchase_data["Gender"] == "Male"]
number_male_users = len(male_users_df.SN.value_counts())
percentage_male= "{:.2%}".format(number_female_users/ total_players)
                 
#female users calculation
female_users_df = purchase_data.loc[purchase_data["Gender"] == "Female"]
number_female_users = len(female_users_df.SN.value_counts())  
percentage_female= "{:.2%}".format(number_female_users/ total_players)


#other/ nondisclosed calculation
none_disclosed_df = purchase_data.loc[purchase_data["Gender"] == "Other / Non-Disclosed"]
none_disclosed_users = len(none_disclosed_df.SN.value_counts())
percentage_none_disclosed = "{:.2%}".format(none_disclosed_user/total_players)

gender_df = pd.DataFrame({'Gender': ['Male', 'Female', 'Other/ Non-disclosed'],
            'Total Count': [number_male_users, number_female_users, none_disclosed_users],
            'Percentage of Players': [percentage_male, percentage_female, percentage_none_disclosed]})
gender_df.set_index('Gender')

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,14.06%
Female,81,14.06%
Other/ Non-disclosed,11,1.91%


# Purchase Analysis (genders)

In [111]:
#filter the df to show only male users
male_purchase_analysis_df = purchase_data.loc[purchase_data.Gender == "Male"]

In [83]:
male_purchase_count = len(male_purchase_analysis_df['Purchase ID'])
avg_male_purchase_price = male_purchase_analysis_df.Price.mean()
total_male_purchase = male_purchase_analysis_df.Price.sum()
avg_purchase_per_male = total_male_purchase/ number_male_users

In [112]:

#filter the df to show only female users
female_purchase_analysis_df = purchase_data.loc[purchase_data.Gender == "Female"]

In [85]:
female_purchase_count = len(female_purchase_analysis_df['Purchase ID'])
avg_female_purchase_price = female_purchase_analysis_df.Price.mean()
total_female_purchase = female_purchase_analysis_df.Price.sum()
avg_purchase_per_female = total_female_purchase/ number_female_users

In [113]:
#filter the df to show other/non-disclosed users
other_purchase_analysis_df = purchase_data.loc[purchase_data.Gender == "Other / Non-Disclosed"]

In [103]:
other_purchase_count = len(other_purchase_analysis_df['Purchase ID'])
avg_other_purchase_price = other_purchase_analysis_df.Price.mean()
total_other_purchase = other_purchase_analysis_df.Price.sum()
avg_purchase_per_other = total_other_purchase/ none_disclosed_users

In [110]:

#construct a data frame for analysis
gender_purchase_df = pd.DataFrame({'Gender': ['Male', 'Female', 'Other/ Non-disclosed'],
              'Purchase Count': [male_purchase_count, female_purchase_count, other_purchase_count],
              'Average Purchase Price': ["${:,.2f}". format(avg_male_purchase_price), "${:,.2f}". format(avg_female_purchase_price), "${:,.2f}". format(avg_other_purchase_price)],
              'Total Purchase Value': ["${:,.2f}". format(total_male_purchase), "${:,.2f}". format(total_female_purchase), "${:,.2f}". format(total_other_purchase)],
              'Avg Total Purchase per Person': ["${:,.2f}". format(avg_purchase_per_male), "${:,.2f}". format(avg_purchase_per_female), "${:,.2f}". format(avg_purchase_per_other)]})
gender_purchase_df.set_index("Gender")

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,652,$3.02,"$1,967.64",$4.07
Female,113,$3.20,$361.94,$4.47
Other/ Non-disclosed,15,$3.35,$50.19,$4.56


# Age Demographic


In [119]:
#create the bin, max 200 in case someone try to be god. There are 8 bins
bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 200]

#there are 8 age groups
age_group = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", ">40"]

purchase_data["Age Group"] = pd.cut(purchase_data.Age, bins, labels=age_group, right=True)
purchase_data

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,>40
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24
...,...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54,20-24
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63,20-24
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46,20-24
778,778,Sisur91,7,Male,92,Final Critic,4.19,<10


In [120]:
purchase_data["Age Group"].value_counts()

20-24    365
15-19    136
25-29    101
30-34     73
35-39     41
10-14     28
<10       23
>40       13
Name: Age Group, dtype: int64