In [2]:
import pandas as pd
import json
import os

In [3]:
filename = 'purchase_data.json'

with open(filename, 'r') as f:
        datastore = json.load(f)


In [4]:
player_df = pd.DataFrame(datastore, columns=['Age', 'Gender','Item ID','Item Name','Price','SN'])
player_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [5]:
#Total Number of Players
total_player = len(player_df)
print("Total Player: ", total_player)

Total Player:  780


In [6]:
# Purchasing Analysis Total

# Number of Unique Items
unique_item_count = player_df['Item ID'].value_counts().count()
unique_item_count

183

In [7]:
# Average Purchase Price
average_purchase_price = player_df['Price'].mean()
average_purchase_price

2.931192307692303

In [8]:
# Total Number of Purchases
total_purchase = player_df['Price'].count()
total_purchase

780

In [9]:
# Total Revenue
total_revenue = player_df['Price'].sum()
total_revenue

2286.3299999999963

In [10]:
print("Purchasing Analysis (Total)")
purchase_analysis_total_df = pd.DataFrame({"Number of Unique Items":[unique_item_count],
                            "Average Purchase Price":["${:.2f}".format(average_purchase_price)],
                            "Total Number of Purchases":[total_purchase],
                            "Total Revenue":["${:.2f}".format(total_revenue)]
                            
                     })
purchase_analysis_total_df

Purchasing Analysis (Total)


Unnamed: 0,Average Purchase Price,Number of Unique Items,Total Number of Purchases,Total Revenue
0,$2.93,183,780,$2286.33


In [20]:
# Gender Demographics

# Percentage and Count of Male Players

number_male_players = player_df['Gender'].value_counts()['Male']
number_male_players

633

In [21]:
percent_male_players = number_male_players/total_player*100
percent_male_players

81.15384615384616

In [22]:
# Percentage and Count of Female Players
number_female_players = player_df['Gender'].value_counts()['Female']
number_female_players

136

In [23]:
percent_female_players = number_female_players/total_player*100
percent_female_players

17.435897435897434

In [26]:
# Percentage and Count of Other / Non-Disclosed

other_player = total_player - (number_male_players + number_female_players)
other_player

11

In [28]:
percent_other_players = other_player/total_player*100
percent_other_players

1.4102564102564104

In [29]:
genderdemograph={}

genderdemograph['Female']=[percent_female_players,number_female_players]
genderdemograph['Male']=[percent_male_players,number_male_players]
genderdemograph['Other/None-Disclosed']=[percent_other_players,other_player]

gender_demograph_df = pd.DataFrame(genderdemograph)
gender_demograph_new_df = gender_demograph_df.transpose()
gender_demograph_new_df.columns=['Percentage of Players', 'Total Count']
gender_demograph_new_df

Unnamed: 0,Percentage of Players,Total Count
Female,17.435897,136.0
Male,81.153846,633.0
Other/None-Disclosed,1.410256,11.0


In [30]:
group_by_gender =player_df.groupby(['Gender'])
group_by_gender_df = group_by_gender['Price'].describe()

gender_group_df = group_by_gender_df.iloc[1,:]
gender_group_df


count    633.000000
mean       2.950521
std        1.109967
min        1.030000
25%        2.040000
50%        2.910000
75%        3.910000
max        4.950000
Name: Male, dtype: float64

In [31]:
# Purchasing Analysis (Gender)

# Male players

# Purchase Count and Average Purchase Price
male_purchase_count = gender_group_df['count']
average_male_purchase_price = gender_group_df['mean']
print(male_purchase_count)
print(average_male_purchase_price)

633.0
2.95052132701


In [66]:
# Total Purchase Value
total_male_purchase = player_df.loc[player_df['Gender']=='Male']['Price'].sum()
total_male_purchase

1867.6799999999985

In [80]:
# Normalized Totals
normalized_male_total = (total_male_purchase-average_male_purchase_price*average_purchase_price)/gender_group_df['std']
normalized_male_total

1615.1065584646565

In [33]:
# Female players

gender_group_df = group_by_gender_df.iloc[0,:]
gender_group_df

count    136.000000
mean       2.815515
std        1.151027
min        1.030000
25%        1.827500
50%        2.615000
75%        3.750000
max        4.950000
Name: Female, dtype: float64

In [57]:

# Purchase Count and Average Purchase Price
female_purchase_count = gender_group_df['count']
average_female_purchase_price = gender_group_df['mean']
print(female_purchase_count)
print(average_female_purchase_price)

136.0
2.81551470588


In [58]:
# Total Purchase Value
total_female_purchase = player_df.loc[player_df['Gender']=='Female']['Price'].sum()

total_female_purchase

382.90999999999985

In [78]:
# Normalized Totals
normalized_female_total = (total_female_purchase-average_female_purchase_price*average_purchase_price)/gender_group_df['std']
normalized_female_total

325.49813780726402

In [60]:
# Other None-Disclosed Players
other_group_df = group_by_gender_df.iloc[2,:]
other_group_df

count    11.000000
mean      3.249091
std       0.957230
min       1.880000
25%       2.285000
50%       3.730000
75%       3.985000
max       4.320000
Name: Other / Non-Disclosed, dtype: float64

In [61]:
# Purchase Count and Average Purchase Price
other_purchase_count = other_group_df['count']
average_other_purchase_price = other_group_df['mean']
print(other_purchase_count)
print(average_other_purchase_price)

11.0
3.24909090909


In [75]:
#Total purchase
total_other_purchase = player_df.loc[player_df['Gender']=='Other / Non-Disclosed']['Price'].sum()
total_other_purchase

35.739999999999995

In [82]:
# Normalized Total
normalized_other_total = (total_other_purchase-average_other_purchase_price*average_purchase_price)/other_group_df['std']
normalized_other_total

27.387662655562998

In [83]:
purchase_gender={}

purchase_gender['Female']=[female_purchase_count,average_female_purchase_price,total_female_purchase,normalized_female_total]
purchase_gender['Male']=[male_purchase_count,average_male_purchase_price,total_male_purchase,normalized_male_total]
purchase_gender['Other/None-Disclosed']=[other_purchase_count,average_other_purchase_price,total_other_purchase,normalized_other_total]

purchase_gender_df = pd.DataFrame(purchase_gender)
purchase_gender_df = purchase_gender_df.transpose()
purchase_gender_df.columns=['Purchase Count', 'Purchase Count','Total Purchase Value','Normalized Totals']
purchase_gender_df

Unnamed: 0,Purchase Count,Purchase Count.1,Total Purchase Value,Normalized Totals
Female,136.0,2.815515,382.91,325.498138
Male,633.0,2.950521,1867.68,1615.106558
Other/None-Disclosed,11.0,3.249091,35.74,27.387663
