In [2]:
import pandas as pd
import os

In [3]:
file = os.path.join("..","Resources","purchase_data.csv")

In [4]:
df = pd.read_csv(file)
df.head(5)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [5]:
# Returns number of SN/user name
total_players = len(df["SN"].unique())
total_players

576

In [6]:
# Creates a list of all the unique items and then a count of all the items
unique_items = df["Item Name"].unique()
total_items = len(unique_items)

In [7]:
# Calculates the mean of the Price column
average_purchase = df["Price"].mean()
# Calculates total revenue by adding the column
total_revenue = df["Price"].sum()

In [8]:
# Creates a data frame to analyze demographic information
demographics_df = df[["SN","Gender"]]
demographics_df["Gender"].value_counts()

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [9]:
# Removes players who made duplicate purchases to accurately estimate demographic breakup of players
player_df = demographics_df.drop_duplicates()
player_df["Gender"].value_counts()

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [10]:
# Creates variables for number of players by gender
male_players = len(player_df.loc[player_df["Gender"] == "Male",:])
female_players = len(player_df.loc[player_df["Gender"] == "Female",:])
other_players = len(player_df.loc[player_df["Gender"] == "Other / Non-Disclosed",:])

In [11]:
# Stores demographic breakup of players
percent_male = round(male_players / total_players, 2)
percent_female = round(female_players / total_players, 2)
percent_other = round(other_players / total_players, 2)

In [25]:
# Creates df for purchasing analysis by gender
purchases_df = df[["Age","Gender","Item Name","Price"]]

In [26]:
purchases_by_gender = purchases_df["Gender"].value_counts()
purchases_by_gender

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [27]:
# Stores average purchase by gender
average_male_purchases = purchases_df.loc[purchases_df["Gender"] == "Male","Price"].mean()
average_female_purchases = purchases_df.loc[purchases_df["Gender"] == "Female","Price"].mean()
average_other_purchases = purchases_df.loc[purchases_df["Gender"] == "Other / Non-Disclosed","Price"].mean()

In [28]:
# Stores value for total purchase value by gender
total_male_purchases = purchases_df.loc[purchases_df["Gender"] == "Male","Price"].sum()
total_female_purchases = purchases_df.loc[purchases_df["Gender"] == "Female","Price"].sum()
total_other_purchases = purchases_df.loc[purchases_df["Gender"] == "Other / Non-Disclosed","Price"].sum()

In [79]:
# Creates a group by for average purchase per person by gender
purchases_by_person = df.groupby(["SN","Gender"],as_index=False)

In [80]:
# Creates a dataframe with the group by object to show the total spend by each screen name
individual_player_spend = purchases_by_person[["Price"]].sum()
individual_player_spend

Unnamed: 0,SN,Gender,Price
0,Adairialis76,Male,2.28
1,Adastirin33,Female,4.48
2,Aeda94,Male,4.91
3,Aela59,Male,4.32
4,Aelaria33,Male,1.79
...,...,...,...
571,Yathecal82,Female,6.22
572,Yathedeu43,Male,6.02
573,Yoishirrala98,Female,4.58
574,Zhisrisu83,Male,7.89


In [86]:
# Stores average total per person purchase by gender
average_male_spend = individual_player_spend.loc[individual_player_spend["Gender"] == "Male","Price"].mean()
average_female_spend = individual_player_spend.loc[individual_player_spend["Gender"] == "Female","Price"].mean()
average_other_spend = individual_player_spend.loc[individual_player_spend["Gender"] == "Other / Non-Disclosed","Price"].mean()

In [187]:
# Create the age bins for analysis
bins = [0]
for i in range(10, df["Age"].max() + 4, 5):
    bins.append(i)
bins

[0, 10, 15, 20, 25, 30, 35, 40, 45]

In [188]:
# Creates group by object for each age group
groups = df.groupby(pd.cut(df["Age"], bins))

In [189]:
# Purchases by age bracket
groups["Price"].count()

Age
(0, 10]      32
(10, 15]     54
(15, 20]    200
(20, 25]    325
(25, 30]     77
(30, 35]     52
(35, 40]     33
(40, 45]      7
Name: Price, dtype: int64

In [190]:
# Average purchase by age bracket
groups["Price"].mean()

Age
(0, 10]     3.405000
(10, 15]    2.900000
(15, 20]    3.107800
(20, 25]    3.020431
(25, 30]    2.875584
(30, 35]    2.994423
(35, 40]    3.404545
(40, 45]    3.075714
Name: Price, dtype: float64

In [191]:
# Total purcahse value by age bracket
groups["Price"].sum()

Age
(0, 10]     108.96
(10, 15]    156.60
(15, 20]    621.56
(20, 25]    981.64
(25, 30]    221.42
(30, 35]    155.71
(35, 40]    112.35
(40, 45]     21.53
Name: Price, dtype: float64

In [192]:
# Creates a DF from the groupby object after grouping the original data by SN. Will use DF created to bin each user by age.
users_by_age = df.groupby(["SN","Age"],as_index=False).sum()
users_by_age

Unnamed: 0,SN,Age,Purchase ID,Item ID,Price
0,Adairialis76,16,467,123,2.28
1,Adastirin33,35,142,175,4.48
2,Aeda94,17,388,128,4.91
3,Aela59,21,28,119,4.32
4,Aelaria33,23,630,171,1.79
...,...,...,...,...,...
571,Yathecal82,20,1400,241,6.22
572,Yathedeu43,22,688,143,6.02
573,Yoishirrala98,17,572,145,4.58
574,Zhisrisu83,10,195,85,7.89


In [197]:
# Takes DF created above and bins it by age so that each user is only counted once
binned_age_df = users_by_age.groupby(pd.cut(users_by_age["Age"], bins))
players_by_age = binned_age_df["SN"].count()
players_by_age

Age
(0, 10]      24
(10, 15]     41
(15, 20]    150
(20, 25]    232
(25, 30]     59
(30, 35]     37
(35, 40]     26
(40, 45]      7
Name: SN, dtype: int64

In [211]:
percent_players_by_age = round((binned_age_df["SN"].count() / users_by_age["SN"].count()) * 100, 2)
percent_players_by_age

Age
(0, 10]      4.17
(10, 15]     7.12
(15, 20]    26.04
(20, 25]    40.28
(25, 30]    10.24
(30, 35]     6.42
(35, 40]     4.51
(40, 45]     1.22
Name: SN, dtype: float64

In [194]:
binned_age_df["Price"].mean()

Age
(0, 10]     4.540000
(10, 15]    3.819512
(15, 20]    4.143733
(20, 25]    4.231207
(25, 30]    3.752881
(30, 35]    4.208378
(35, 40]    4.321154
(40, 45]    3.075714
Name: Price, dtype: float64

In [237]:
# Top Spenders Analysis
top_spenders = users_by_age.nlargest(n=5, columns=["Price"]).reset_index()
top_spenders = top_spenders[["SN","Age","Price"]]
top_spenders

Unnamed: 0,SN,Age,Price
0,Lisosia93,25,18.96
1,Idastidru52,24,15.45
2,Chamjask73,22,13.83
3,Iral74,21,13.62
4,Iskadarya95,20,13.1
