Dependencies

In [2]:
import os
import pandas as pd

Data &rarr; data frame

In [3]:
purchase_data_path = os.path.join('Resources','purchase_data.json')
purchase_df = pd.read_json(purchase_data_path)

#### Get total number of players

In [4]:
#Distinguish players by SN attribute
total_players = len(purchase_df['SN'].unique())
total_players_df = pd.DataFrame({'Total Players':[total_players]})
total_players_df


Unnamed: 0,Total Players
0,573


####  Do purchase analysis

In [5]:
total_items = len(purchase_df['Item ID'].unique())
average_item_price = purchase_df['Price'].mean()
total_purchases = purchase_df['Price'].count()
total_revenue = purchase_df['Price'].sum()
#create dataframe with computed values
purchase_analysis_total_df = pd.DataFrame(
    {'Number of Unique Items':[total_items],
     'Average Purchase Price':[average_item_price],
     'Total Number of Purchases':[total_purchases],
     'Total Revenue':[total_revenue]})
#format currency
purchase_analysis_total_df['Average Purchase Price'] = \
purchase_analysis_total_df['Average Purchase Price'].map('${:,.2f}'.format)
purchase_analysis_total_df['Total Revenue'] = \
purchase_analysis_total_df['Total Revenue'].map('${:,.2f}'.format)
#re-arrange columns
purchase_analysis_total_df = purchase_analysis_total_df[['Number of Unique Items',
                                                        'Average Purchase Price',
                                                        'Total Number of Purchases',
                                                        'Total Revenue']]
purchase_analysis_total_df

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


#### Gender demographics

In [6]:
males_df = purchase_df.loc[purchase_df['Gender'] == 'Male']
females_df = purchase_df.loc[purchase_df['Gender'] == 'Female']
#use SN to distinguish players and avoid double counting
males = len(males_df['SN'].unique())
females = len(females_df['SN'].unique())
others = len(purchase_df['SN'].unique()) - males - females
#create dataframe 
gender_df = pd.DataFrame({'Male':[100*males/total_players, males],
                          'Female':[100*females/total_players, females],
                         'Other / Non-Disclose':[100*others/total_players, others]})
#re-arrange columns
gender_df = gender_df[['Male', 'Female','Other / Non-Disclose']]
gender_df.index = ['Percentage of Players', 'Total Count']
gender_df.transpose()


Unnamed: 0,Percentage of Players,Total Count
Male,81.151832,465.0
Female,17.452007,100.0
Other / Non-Disclose,1.396161,8.0


#### Do purchase analysis by gender

In [36]:
#group data into gender categories
purchase_gender_gp = purchase_df.groupby('Gender')
#aggregate to get each category's count, sum, mean, divide sum/count to get norm totals
gender_count_series =  purchase_gender_gp['Price'].count()
gender_mean_series = purchase_gender_gp['Price'].mean()
gender_total_series = purchase_gender_gp['Price'].sum()
gender_norm_series = \
purchase_gender_gp['Price'].sum()/purchase_gender_gp['Price'].count()
#round float values
gender_mean_series.map(lambda x: round(x,3))
gender_total_series.map(lambda x: round(x,3))
gender_norm_series.map(lambda x: round(x, 3))
#merge series to craete dataframe
purchase_gender_df = pd.concat([gender_mean_series, gender_norm_series,
                                gender_count_series, gender_total_series], axis=1)
purchase_gender_df.columns = ['Purchase Count', 'Average Purchase Price',
                              'Total Purchase Value', 'Normalized Totals']
purchase_gender_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,2.815515,2.815515,136,382.91
Male,2.950521,2.950521,633,1867.68
Other / Non-Disclosed,3.249091,3.249091,11,35.74


#### Age Demographics

In [None]:
The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)
Purchase Count
Average Purchase Price
Total Purchase Value
Normalized Totals

In [48]:
purchase_df['Age'].max() - purchase_df['Age'].min()
age_bins = [0, 10, 14, 18,22, 26, 30, 34, 38, 42,150]
labels = ['<10','10-13','14-17', '18-21', '22-25', '26-29', '30-33', '34-37', '38-41','>24']

(11, 10)

In [53]:
pd.cut(purchase_df['Age'], bins=age_bins, labels=labels)
purchase_df['Age Range'] = pd.cut(purchase_df['Age'], bins=age_bins, labels=labels)
purchase_df.head()
purchase_age_range_gp = purchase_df.groupby('Age Range')
purchase_age_range_gp['Price'].count()

Age Range
<10       32
10-13     31
14-17    111
18-21    231
22-25    207
26-29     63
30-33     46
34-37     37
38-41     20
>24        2
Name: Price, dtype: int64

In [14]:
males_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [129]:
purchase_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [42]:
len(purchase_df)

780