In [2]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
#Statistical overview:
purchase_data.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,91.755128,3.050987
std,225.310896,6.659444,52.697702,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,47.75,1.98
50%,389.5,22.0,92.0,3.15
75%,584.25,25.0,138.0,4.08
max,779.0,45.0,183.0,4.99


In [4]:
sn_count = purchase_data["SN"].value_counts()
total_players = len(sn_count)

disp_total_players = pd.DataFrame({"Total Players":[total_players]})
disp_total_players

Unnamed: 0,Total Players
0,576


In [11]:
#Purchasing Analysis (Total)
item_count = purchase_data["Item Name"].value_counts()

unique_item = len(item_count)

average_price = round(purchase_data["Price"].mean(),2)

number_purchases = len(purchase_data)

total_revenue = purchase_data["Price"].sum()

In [12]:
#Purchasing Analysis (Total)
data_df = pd.DataFrame({"Number of Unique Items":[unique_item], 
                        "Average Price": "$"+str(average_price), 
                        "Number of Purchases":[number_purchases], 
                        "Total Revenue": "$"+str(total_revenue)})
data_df


Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,$2379.77


In [21]:
#Gender Demographics
Male_count = purchase_data['Gender'].value_counts()

Male_count

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [102]:
unique_data = purchase_data["SN"].unique()
unique_data_df = len(unique_data)
unique_data_df

576

In [127]:
#Count every gender separately
only_male = purchase_data.loc[(purchase_data["Gender"] == "Male")]
only_females = purchase_data.loc[(purchase_data["Gender"] == "Female")]
only_non = purchase_data.loc[(purchase_data["Gender"] == "Other / Non-Disclosed")]

total_males = len(only_male["SN"].unique())
total_females = len(only_females["SN"].unique())
total_non = len(only_non["SN"].unique())

#Percentages
male_percent = round(((total_males* 100)/ 576),2)
female_percent = round(((total_females* 100)/ 576),2)
other_percent = round(((total_non* 100)/ 576),2)

#Dataframe
gender_df = pd.DataFrame({"Gender":["Female", "Male", "Other / Non-Disclosed"],
                          "Total count":[total_females, total_males, total_non],
                         "Percentage of Players":[str(female_percent)+"%", str(male_percent)+"%", str(other_percent)+"%"]})

gender_display_df = gender_df.set_index("Gender")
gender_display_df


Unnamed: 0_level_0,Total count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06%
Male,484,84.03%
Other / Non-Disclosed,11,1.91%


In [135]:
#Purchasin Analysis (Gender)


#Total purchases by gender
purchase_male_count = only_male["Purchase ID"].value_counts()
total_male_purchase = len(purchase_male_count)

purchse_female_count = only_females["Purchase ID"].value_counts()
total_female_purchase = len(purchse_female_count)

purchse_other_count = only_non["Purchase ID"].value_counts()
total_other_purchase = len(purchse_other_count)

#Averge Purchase Price by gender
av_male_purchse = round(((only_male["Price"].sum())/total_male_purchase), 2)
av_female_purchse = round(((only_females["Price"].sum())/total_female_purchase), 2)
av_others_purchse = round(((only_non["Price"].sum())/total_other_purchase), 2)

#Total Purchase Value
male_purchase_value = only_male["Price"].sum()
female_purchase_value = only_females["Price"].sum()
other_purchase_value = only_non["Price"].sum()

#Avg purchases per person
gender_groupby = purchase_data.groupby(["Gender"])
gender_summary_df = gender_groupby.Price.agg(["sum"])
purchase_per_person = round((gender_summary_df["sum"]/gender_display_df["Total count"]),2)

ppp_female = purchase_per_person["Female"]
ppp_male = purchase_per_person["Male"]
ppp_other = purchase_per_person["Other / Non-Disclosed"]

#Dataframe
gender_purchasing_df = pd.DataFrame({"Gender":["Female", "Male", "Other / Non-Disclosed"],
                                    "Purchase Count":[total_female_purchase, total_male_purchase, total_other_purchase],
                                    "Averge Purchase Price":["$"+str(av_female_purchse), "$"+str(av_male_purchse), "$"+str(av_others_purchse)],
                                    "Total Purchases Value":["$"+str(female_purchase_value), "$"+str(male_purchase_value), "$"+str(other_purchase_value)],
                                    "Avg Total Purchase per Person":["$"+str(ppp_female), "$"+str(ppp_male), "$"+str(ppp_other)]})





gender_summary = gender_purchasing_df.set_index("Gender")
gender_summary
#av_male_purchse
#male_purchase_value

Unnamed: 0_level_0,Purchase Count,Averge Purchase Price,Total Purchases Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.2,$361.94,$4.47
Male,652,$3.02,$1967.64,$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [222]:
#Age Demographics
#The minimum and maximum ages
print(purchase_data["Age"].max())
print(purchase_data["Age"].min())

#create bins
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 44]
group_labels = ["0-10", "10-14", "15-19", "20-24",
                "25-29", "30-34", "35-39", "40-45"]

#Place into bins
pd.cut(purchase_data["Age"], age_bins, labels=group_labels).head()
#Place into columns of Dataframe
purchase_data["Age Group"] = pd.cut(purchase_data["Age"], age_bins, labels=group_labels)

age_group = purchase_data.groupby("Age Group")


age_group[["Age"]].count()

45
7


Unnamed: 0_level_0,Age
Age Group,Unnamed: 1_level_1
0-10,23
10-14,28
15-19,136
20-24,365
25-29,101
30-34,73
35-39,41
40-45,12


In [249]:
#Purchasing Analysis (Age)

purchase_data["Age Group"] = pd.cut(purchase_data["Age"], age_bins, labels=group_labels)

age_group = purchase_data.groupby("Age Group")

Purchase_Count = age_group[["Price"]].count()
Average_Purchase_Price = age_group[["Price"]].mean()
Total_Purchase_Value = age_group[["Price"]].sum()

Purchase_Count_list = list(Purchase_Count["Price"])
Average_Purchase_Price_list = list(Average_Purchase_Price["Price"])
Total_Purchase_Value_list = list(Total_Purchase_Value["Price"])

summary_df = pd.DataFrame({"Age Ranges":["<10", "11-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"],
                           "Purchase Count":Purchase_Count_list,
                          "Average Purchase Price":Average_Purchase_Price_list,
                          "Total Purchase Value":Total_Purchase_Value_list})

summary2_df = summary_df.set_index("Age Ranges")
summary2_df
#Purchase_Count_list

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<10,23,3.353478,77.13
11-14,28,2.956429,82.78
15-19,136,3.035956,412.89
20-24,365,3.052219,1114.06
25-29,101,2.90099,293.0
30-34,73,2.931507,214.0
35-39,41,3.601707,147.67
40+,12,3.045,36.54
