In [1]:
#import dependencies
import pandas as pd
import json
import os

In [2]:
jsondata = os.path.join("Resources","purchase_data.json")

In [3]:
#Read JSON data into a variable
with open(jsondata) as json_data:
    d = json.load(json_data)

In [197]:
#turn data into dataframe
game_df = pd.DataFrame(d, columns=['SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price'])
game_df.head()

SN           573
Age           37
Gender         3
Item ID      183
Item Name    179
dtype: int64

In [190]:
#Player Count
total_players = len(game_df['SN'].value_counts())
Total_Players = pd.DataFrame({"Total Players": total_players}, index=[0])
Total_Players

Unnamed: 0,Total Players
0,573


In [191]:
#Purchasing Analysis

In [192]:
#Number of Unique Items
unique_items = len(game_df['Item ID'].value_counts())

#Average Purchase Price
average_price = game_df['Price'].mean()

#Total Number of Purchases
total_purchases = game_df['Item Name'].count()


#Total Revenue
total_revenue = game_df['Price'].sum()

#Create DataFrame
purchasing_analysis = pd.DataFrame({"Number of Unique Items": [unique_items],
                                   "Average Price": [average_price],
                                   "Total Purchases": [total_purchases],
                                   "Total Revenue": [total_revenue],
                                
})

#Reorder DataFrame
purchasing_analysis = purchasing_analysis[["Number of Unique Items", "Average Price","Total Purchases", "Total Revenue"]]
                                

#improve formatting
purchasing_analysis["Average Price"] = purchasing_analysis["Average Price"].map("${0:,.2f}".format)
purchasing_analysis["Total Revenue"] = purchasing_analysis["Total Revenue"].map("${0:,.2f}".format)

purchasing_analysis

Unnamed: 0,Number of Unique Items,Average Price,Total Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


In [193]:
#Gender Demographics

In [194]:
#Group data by Gender and filter duplicates
grouped_df = game_df.groupby(["Gender"])
unique_df = grouped_gender.nunique()

#Total Gender
total_gender = unique_df["SN"].sum()

#Percentage and Count of Players
count = unique_df["SN"].unique()
percentage = unique_df["SN"]/ total_gender

#Create new dataframe
final_gender = pd.DataFrame({"Percentage of Players": percentage,
                            "Count":count})
#Change percentage format and re order columns
final_gender["Percentage of Players"] = final_gender["Percentage of Players"].map("{:,.2%}".format) 
final_gender.columns = ["Percentage of Players", "Count"]
#Print final dataframe
final_gender


Unnamed: 0_level_0,Percentage of Players,Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,100,17.45%
Male,465,81.15%
Other / Non-Disclosed,8,1.40%


In [246]:
#Purchasing Analysis (Gender)

#Purchase Count
purchase_count = unique_df["Gender"].value_counts()

#Average Purchase Price
average_price = grouped_df["Price"].mean()

#Total Purchase Value
purchase_price = grouped_df["Price"].sum()

#Normalized Totals
normalized = purchase_price / count

#Create new dataframe
gender_analysis = pd.DataFrame({"Average Purchase Price": average_price,
                            "Total Purchase Price":purchase_price,
                               "Normalized Totals": normalized})

#Clean up formatting and reorder columns
gender_analysis["Average Purchase Price"] = gender_analysis["Average Purchase Price"].map("${:,.2f}".format) 
gender_analysis["Total Purchase Price"] = gender_analysis["Total Purchase Price"].map("${:,.2f}".format) 
gender_analysis["Normalized Totals"] = gender_analysis["Normalized Totals"].map("${:,.2f}".format) 
gender_analysis

Unnamed: 0_level_0,Average Purchase Price,Normalized Totals,Total Purchase Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,$2.82,$3.83,$382.91
Male,$2.95,$4.02,"$1,867.68"
Other / Non-Disclosed,$3.25,$4.47,$35.74


In [298]:
#Age Demographics
cleaned_df = game_df.drop_duplicates("SN")

#The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)
bins = [10, 14, 19, 23, 27, 31, 35, 39, 40]
groups = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

#Create a  new column for age groups and then groupby Age Groups
cleaned_df["Age Groups"] = pd.cut(cleaned_df["Age"], bins, labels=groups)
age_df = cleaned_df.groupby(["Age Groups"])

#Purchase Count
age_purchase = game_df["Age Groups"].value_counts()

#Average Purchase Price
age_average_price = age_df["Price"].mean()

#Total Purchase Value
age_price = age_df["Price"].sum()

#Normalized Totals
normalized_age = age_price / age_purchase

#Create new dataframe
age_analysis = pd.DataFrame({"Average Purchase Price":age_average_price,
                            "Total Purchase Value":age_price,
                               "Normalized Totals": normalized_age})

#Clean up formatting
age_analysis["Average Purchase Price"] = age_analysis["Average Purchase Price"].map("${:,.2f}".format) 
age_analysis["Total Purchase Value"] = age_analysis["Total Purchase Value"].map("${:,.2f}".format) 
age_analysis["Normalized Totals"] = age_analysis["Normalized Totals"].map("${:,.2f}".format) 

#Move bottom row to the top
age_analysis = pd.concat([age_analysis.loc[["<10"],:], age_analysis.drop("<10", axis=0)], axis=0)

age_analysis



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,Average Purchase Price,Normalized Totals,Total Purchase Value
<10,$2.60,$1.68,$51.99
10-14,$2.90,$2.18,$289.88
15-19,$2.90,$2.28,$605.46
20-24,$3.13,$2.24,$378.79
25-29,$3.01,$1.95,$117.24
30-34,$3.32,$2.69,$112.95
35-39,$2.71,$1.53,$46.03
40+,$3.20,$1.83,$25.61
