In [417]:
#Dependencies
import pandas as pd
import numpy as np
from pprint import pprint

In [418]:
#Import data
data_to_load = "Resources/purchase_data.csv"

In [419]:
#Convert to dataframe
purchase_data_df = pd.read_csv(data_to_load)

In [420]:
#Preview dataframe
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [421]:
#Check for holes in data
purchase_data_df.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

## Player Count
Total Number of Players

In [422]:
total_players = purchase_data_df["SN"].nunique()
total_players

576

## Purchasing Analysis (Total)
* Number of Unique Items
* Average Purchase Price
* Total Number of Purchases
* Total Revenue

In [423]:
#Calculate number of unique items
unique_item_count = purchase_data_df["Item ID"].drop_duplicates().count()
unique_item_count

183

In [424]:
#Calculate average purchase price
average_purchase_price = round(purchase_data_df["Price"].mean(),2)
average_purchase_price

3.05

In [425]:
#Calculate total number of purchases
total_purchases = purchase_data_df["Purchase ID"].count()
total_purchases

780

In [426]:
#Calculate total revenue
total_revenue = purchase_data_df["Price"].sum()
total_revenue

2379.77

In [427]:
#Build dataframe
purchasing_analysis_df = pd.DataFrame({"Number of Unique Items": [unique_item_count],
                                      "Average Purchase Price": [average_purchase_price],
                                      "Total Number of Purchases": [total_purchases],
                                      "Total Revenue": [total_revenue]})
purchasing_analysis_df

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,183,3.05,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players
* Percentage and Count of Female Players
* Percentage and Count of Other / Non-Disclosed

In [428]:
#Identify exact syntax of Gender data types
purchase_data_df["Gender"].unique()

array(['Male', 'Other / Non-Disclosed', 'Female'], dtype=object)

In [447]:
#Create Gender Demographics dataframe by omitting duplicate SN and grouping by Gender
gender_df = purchase_data_df[["Gender","SN"]]
gender_df = gender_df.drop_duplicates(["Gender", "SN"]).groupby("Gender").count()
gender_df = gender_df.rename(columns= {"SN": "Total Players"})
gender_df

Unnamed: 0_level_0,Total Players
Gender,Unnamed: 1_level_1
Female,81
Male,484
Other / Non-Disclosed,11


In [456]:
#Add Count of Male Players column
gender_df["Percentage of Players"] = round((gender_df["Total Players"]/total_players)*100,2)
gender_df

Unnamed: 0_level_0,Total Players,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06
Male,484,84.03
Other / Non-Disclosed,11,1.91


## Purchasing Analysis (Gender)
The below each broken by gender:
  * Purchase Count
  * Average Purchase Price
  * Total Purchase Value
  * Average Purchase Total per Person by Gender

In [439]:
#Creat new dataframe with unique players broken down by gender to start
gender_analysis_df = gender_df

In [440]:
#Add purchase count by gender column
gender_analysis_df["Total Purchase Count"] = purchase_data_df[["Purchase ID", "Gender"]].groupby("Gender").count()
gender_analysis_df

Unnamed: 0_level_0,Total Count,Percentage of Players,Total Purchase Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,81,14.06,113
Male,484,84.03,652
Other / Non-Disclosed,11,1.91,15


In [441]:
#Add average purchase price by gender column
gender_analysis_df["Average Purchase Price"] = round(purchase_data_df[["Price", "Gender"]].groupby("Gender").mean(),2)
gender_analysis_df

Unnamed: 0_level_0,Total Count,Percentage of Players,Total Purchase Count,Average Purchase Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,81,14.06,113,3.2
Male,484,84.03,652,3.02
Other / Non-Disclosed,11,1.91,15,3.35


In [442]:
#Calculate total purchase value by gender column
gender_analysis_df["Total Purchase Value"] = purchase_data_df[["Price", "Gender"]].groupby("Gender").sum()
gender_analysis_df

Unnamed: 0_level_0,Total Count,Percentage of Players,Total Purchase Count,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,81,14.06,113,3.2,361.94
Male,484,84.03,652,3.02,1967.64
Other / Non-Disclosed,11,1.91,15,3.35,50.19


In [443]:
#Add Average Total Purchase per Person column and remove Percentage of Players column
gender_analysis_df["Avg Total Purchase per Person"] = round(gender_analysis_df["Total Purchase Value"]/
                                                            gender_analysis_df["Total Count"],2)
gender_analysis_df = gender_analysis_df.drop(columns =["Total Count", "Percentage of Players"])
gender_analysis_df

Unnamed: 0_level_0,Total Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.2,361.94,4.47
Male,652,3.02,1967.64,4.07
Other / Non-Disclosed,15,3.35,50.19,4.56


## Age Demographics
The below each broken into bins of 4 years (i.e. &lt;10, 10-14, 15-19, etc.)
  * Purchase Count
  * Average Purchase Price
  * Total Purchase Value
  * Average Purchase Total per Person by Age Group