In [181]:
#Import all necessary modules
import pandas as pd
import numpy as np
import os 


In [182]:
#create the path and file
json_path =  os.path.join ("purchase_data.json")
file_df = pd.read_json(json_path)
file_df.count()

Age          780
Gender       780
Item ID      780
Item Name    780
Price        780
SN           780
dtype: int64

In [183]:
#Get generall feel of the data
file_df.columns

Index(['Age', 'Gender', 'Item ID', 'Item Name', 'Price', 'SN'], dtype='object')

In [184]:
#See what kind of types are in the data
file_df.dtypes

Age            int64
Gender        object
Item ID        int64
Item Name     object
Price        float64
SN            object
dtype: object

** Player Count**

In [185]:
#Total Number of Players
total_players = file_df["Item Name"].count()

total_players_df = pd.DataFrame({'Total Players': total_players}, index = [" "])
total_players_df

Unnamed: 0,Total Players
,780



**Purchasing Analysis (Total)**

In [186]:
# Number of Unique Items
unique_items = file_df["Item Name"].nunique()
unique_items

179

In [187]:
#average purchases price
average_purchase = np.round((file_df["Price"].mean()),decimals=2)
average_purchase_clean = '${:,.2f}'.format(average_purchase)
average_purchase_clean

'$2.93'

In [188]:
# Total Number of Purchases
total_purchases = file_df["Price"].value_counts().sum()
total_purchases

780

In [189]:
# Total Revenue
total_revenue = pd.to_numeric(average_purchase) * (total_purchases)
total_revenue = '${:,.2f}'.format(total_revenue)
total_revenue

'$2,285.40'

In [190]:
purchasing_analysis_df = pd.DataFrame({'Number of Unique Items':unique_items,
                                      'Average Price':average_purchase_clean,
                                      'Number of Purchases': total_purchases,
                                      'Total Revenue': total_revenue}, index = [" "])
purchasing_analysis_df

Unnamed: 0,Average Price,Number of Purchases,Number of Unique Items,Total Revenue
,$2.93,780,179,"$2,285.40"


**Gender Demographics**

In [60]:
file_df.groupby("Gender").count()

Unnamed: 0_level_0,Age,Item ID,Item Name,Price,SN
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,136,136,136,136,136
Male,633,633,633,633,633
Other / Non-Disclosed,11,11,11,11,11


In [191]:
# Percentage and Count of Male Players
filtered_male = len(file_df.loc[file_df["Gender"]=="Male"])
filtered_male_percent = (filtered_male/total_players).round(3)
filtered_male_percent = "{0:.2f}%".format(filtered_male_percent * 100) 
filtered_male_percent


'81.20%'

In [192]:
# Percentage and Count of Female Players
filtered_female = len(file_df.loc[file_df["Gender"]=="Female"])
filtered_female_percent = (filtered_female/total_players).round(3)
filtered_female_percent = "{0:.2f}%".format(filtered_female_percent * 100) 
filtered_female_percent

'17.40%'

In [193]:
#Percentage and Count of Other / Non-Disclosed

filtered_other = len(file_df.loc[file_df["Gender"]=="Other / Non-Disclosed"])
filtered_other_percent = (filtered_other/total_players).round(3)
filtered_other_percent = "{0:.2f}%".format(filtered_other_percent * 100) 
filtered_other_percent

'1.40%'

In [194]:
gender_demo_df = pd.DataFrame({'Total Count': [filtered_male, filtered_female,
                                                   filtered_other],
                               'Percentage of Players':[filtered_male_percent,
                                                        filtered_female_percent,
                                                        filtered_other_percent]}, 
                                index = ['Male', 'Female', 'Other / Non-Disclosed'])
gender_demo_df

Unnamed: 0,Percentage of Players,Total Count
Male,81.20%,633
Female,17.40%,136
Other / Non-Disclosed,1.40%,11


**Purchasing Analysis (Gender)** 

In [195]:
# The below each broken by gender
# Purchase Count
# pc_count_male = len(file_df.loc[file_df["Gender"]=="Male"])
# pc_count_female = len(file_df.loc[file_df["Gender"]=="Female"])
# pc_count_other = len(file_df.loc[file_df["Gender"]=="Other / Non-Disclosed"])
grouped_count = pd.DataFrame(file_df.groupby(['Gender'])['Price'].count())



In [196]:
#Average Purchase Price
grouped_purch = pd.DataFrame(file_df.groupby(['Gender'])['Price'].mean())




In [197]:
#Total Purchase Value
group_purch_value = pd.DataFrame(file_df.groupby(['Gender'])['Price'].sum())




In [198]:
normalized = group_purch_value.div(grouped_count.iloc[0], axis='columns')


In [199]:
normalized.columns = ['Normalized Totals']
grouped_purch.columns = ['Average Purchase Price']
group_purch_value.columns = ['Total Purchase Value']
grouped_count.columns = ['Purchase Count']


purchasing_ana_gen =pd.concat([grouped_count,group_purch_value,normalized], axis=1)
purchasing_ana_gen

Unnamed: 0_level_0,Purchase Count,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,136,382.91,2.815515
Male,633,1867.68,13.732941
Other / Non-Disclosed,11,35.74,0.262794


**Age Demographics**

In [None]:

# The below each broken into bins of 4 years (i.e. &lt;10, 10-14, 15-19, etc.) 
# Purchase Count
# Average Purchase Price
# Total Purchase Value
# Normalized Totals



**Top Spenders**

In [255]:
y =  file_df['SN'].groupby((file_df['Price']).sum())
y

KeyError: 2286.33

In [240]:
# Total Purchase Value
total_p_val = pd.DataFrame((file_df.groupby(['SN'])['Price'].sum()).nlargest(5))
total_p_val

Unnamed: 0_level_0,Price
SN,Unnamed: 1_level_1
Undirrala66,17.06
Saedue76,13.56
Mindimnya67,12.74
Haellysu29,12.73
Eoda93,11.58


In [238]:
# p_count.columns = ['Purchase Count']
# ave_p_val.columns = ['Average Purchase Price']
# total_p_val.columns = ['Total Purchase Value']

# top_spender =pd.concat([p_count,ave_p_val,total_p_val], axis=1)
# top_spender
t = (file_df.loc[file_df['SN'].isin(['Undirrala66','Saedue76','Haellysu29','Eoda93'])])
t =
# t = file_df.loc[file_df['SN'].isin(([file_df.groupby(['SN'])['Price'].sum().nlargest(5)])]
                            
# t
# t = file_df.loc[file_df['SN'].isin([file_df.groupby((['SN'])['Price'].sum().nlargest(5))])]
# t

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
15,21,Male,96,Blood-Forged Skeletal Spine,4.77,Haellysu29
79,29,Male,144,Blood Infused Guardian,2.86,Undirrala66
107,29,Male,115,Spectral Diamond Doomblade,4.25,Undirrala66
108,22,Male,35,Heartless Bone Dualblade,2.63,Eoda93
131,29,Male,62,Piece Maker,4.36,Undirrala66
166,22,Male,173,Stormfury Longsword,4.83,Eoda93
259,25,Male,13,Serenity,1.49,Saedue76
337,25,Male,140,Striker,3.82,Saedue76
381,21,Male,166,Thirsty Iron Reaver,4.25,Haellysu29
411,25,Male,7,"Thorn, Satchel of Dark Souls",4.51,Saedue76


In [219]:
# Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
# SN
# Purchase Count
p_count = pd.DataFrame((file_df.groupby(['SN'])['Price'].count()).nlargest(5))
p_count


Unnamed: 0_level_0,Price
SN,Unnamed: 1_level_1
Undirrala66,5
Hailaphos89,4
Mindimnya67,4
Qarwen67,4
Saedue76,4


In [220]:
# average Purchase Price
ave_p_val = pd.DataFrame((file_df.groupby(['SN'])['Price'].mean()).nlargest(5))
ave_p_val

Unnamed: 0_level_0,Price
SN,Unnamed: 1_level_1
Frichaststa61,4.95
Palurrian69,4.95
Qiluard68,4.95
Tyarithn67,4.95
Assossa43,4.89


**Most Popular Items**

In [None]:
# Identify the 5 most popular items by purchase count, then list (in a table):
# Item ID
# Item Name
# Purchase Count
# Item Price
#Total Purchase Value

**Most Profitable Items**

In [None]:
# Identify the 5 most profitable items by total purchase value, then list (in a table):
# Item ID
# Item Name
# Purchase Count
# Item Price
# Total Purchase Value



In [None]:



# As final considerations:
# * You must submit a link to your Jupyter Notebook with the viewable Data Frames. 
# * You must include an exported markdown version of your Notebook called  `README.md` in your GitHub repository.  
# * You must include a written description of three observable trends based on the data. 

