In [2]:
import pandas as pd
import os

In [3]:
file = os.path.join("..","Resources","purchase_data.csv")

In [4]:
df = pd.read_csv(file)
df.head(5)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [5]:
# Returns number of SN/user name
total_players = len(df["SN"].unique())
total_players

576

In [6]:
# Creates a list of all the unique items and then a count of all the items
unique_items = df["Item Name"].unique()
total_items = len(unique_items)

In [7]:
# Calculates the mean of the Price column
average_purchase = df["Price"].mean()
# Calculates total revenue by adding the column
total_revenue = df["Price"].sum()

In [8]:
# Creates a data frame to analyze demographic information
demographics_df = df[["SN","Gender"]]
demographics_df["Gender"].value_counts()

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [9]:
# Removes players who made duplicate purchases to accurately estimate demographic breakup of players
player_df = demographics_df.drop_duplicates()
player_df["Gender"].value_counts()

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [10]:
# Creates variables for number of players by gender
male_players = len(player_df.loc[player_df["Gender"] == "Male",:])
female_players = len(player_df.loc[player_df["Gender"] == "Female",:])
other_players = len(player_df.loc[player_df["Gender"] == "Other / Non-Disclosed",:])

In [11]:
# Stores demographic breakup of players
percent_male = round(male_players / total_players, 2)
percent_female = round(female_players / total_players, 2)
percent_other = round(other_players / total_players, 2)

In [25]:
# Creates df for purchasing analysis by gender
purchases_df = df[["Age","Gender","Item Name","Price"]]

In [26]:
purchases_by_gender = purchases_df["Gender"].value_counts()
purchases_by_gender

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [27]:
# Stores average purchase by gender
average_male_purchases = purchases_df.loc[purchases_df["Gender"] == "Male","Price"].mean()
average_female_purchases = purchases_df.loc[purchases_df["Gender"] == "Female","Price"].mean()
average_other_purchases = purchases_df.loc[purchases_df["Gender"] == "Other / Non-Disclosed","Price"].mean()

In [28]:
# Stores value for total purchase value by gender
total_male_purchases = purchases_df.loc[purchases_df["Gender"] == "Male","Price"].sum()
total_female_purchases = purchases_df.loc[purchases_df["Gender"] == "Female","Price"].sum()
total_other_purchases = purchases_df.loc[purchases_df["Gender"] == "Other / Non-Disclosed","Price"].sum()

In [79]:
# Creates a group by for average purchase per person by gender
purchases_by_person = df.groupby(["SN","Gender"],as_index=False)

In [80]:
# Creates a dataframe with the group by object to show the total spend by each screen name
individual_player_spend = purchases_by_person[["Price"]].sum()
individual_player_spend

Unnamed: 0,SN,Gender,Price
0,Adairialis76,Male,2.28
1,Adastirin33,Female,4.48
2,Aeda94,Male,4.91
3,Aela59,Male,4.32
4,Aelaria33,Male,1.79
...,...,...,...
571,Yathecal82,Female,6.22
572,Yathedeu43,Male,6.02
573,Yoishirrala98,Female,4.58
574,Zhisrisu83,Male,7.89


In [86]:
# Stores average total per person purchase by gender
average_male_spend = individual_player_spend.loc[individual_player_spend["Gender"] == "Male","Price"].mean()
average_female_spend = individual_player_spend.loc[individual_player_spend["Gender"] == "Female","Price"].mean()
average_other_spend = individual_player_spend.loc[individual_player_spend["Gender"] == "Other / Non-Disclosed","Price"].mean()

In [115]:
# Create the age bins for analysis
bins = [0]
for i in range(10, df["Age"].max() + 4, 4):
    bins.append(i)
bins

[0, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46]

In [117]:
# Creates group by object for each age group
groups = df.groupby(pd.cut(df["Age"], bins))

In [124]:
# Purchases by age bracket
groups["Price"].count()

Age
(0, 10]      32
(10, 14]     19
(14, 18]    113
(18, 22]    254
(22, 26]    207
(26, 30]     63
(30, 34]     38
(34, 38]     35
(38, 42]     15
(42, 46]      4
Name: Price, dtype: int64

In [122]:
# Average purchase by age bracket
groups["Price"].mean()

Age
(0, 10]     3.405000
(10, 14]    2.681579
(14, 18]    3.034602
(18, 22]    3.038937
(22, 26]    3.063961
(26, 30]    2.876667
(30, 34]    2.728421
(34, 38]    3.552857
(38, 42]    3.366667
(42, 46]    2.765000
Name: Price, dtype: float64

In [123]:
# Total purcahse value by age bracket
groups["Price"].sum()

Age
(0, 10]     108.96
(10, 14]     50.95
(14, 18]    342.91
(18, 22]    771.89
(22, 26]    634.24
(26, 30]    181.23
(30, 34]    103.68
(34, 38]    124.35
(38, 42]     50.50
(42, 46]     11.06
Name: Price, dtype: float64