In [1]:
# Dependencies and Setup
import pandas as pd

In [9]:
# Crate Data Frame from csv file
purchase_data_file = "Resources/purchase_data.csv"
purchase_df = pd.read_csv(purchase_data_file)
purchase_df.head(20)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
5,5,Yalae81,22,Male,81,Dreamkiss,3.61
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67
8,8,Undjask33,22,Male,21,Souleater,1.1
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58


In [8]:
# Check for missing data
purchase_df.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

In [3]:
# Display the total number of players
    # Get a list of the value counts in "SN"
    # Get the length of value counts in "SN" to get the number of players
player_count = len(purchase_df.value_counts(["SN"]))
player_count

576

In [4]:
 # Purchasing Analysis (Total)
    # Run basic calculations to obtain number of unique items, average price, etc.
    # Create a summary data frame to hold the results
    # Optional: give the displayed data cleaner formatting
    # Display the summary data frame

# Get a list of all the Purchased Items    
item_list = list(purchase_df["Item Name"])

# Get a count of the Purchased Items
total_items_purchased= len(item_list)

# Get a count of the unique items
unique_items_list = purchase_df["Item Name"].unique()
num_unique_items = len(unique_items_list)

# Group data frame by Item Name
item_summary_df = purchase_df.groupby("Item Name").agg({"Item Name":['count'], "Price":['mean', 'sum']})

# Average price of the items
ave_price_items = purchase_df["Price"].mean()

# Get the total revenue
total_revenue = purchase_df["Price"].sum()

# Summary DataFrame for the Purchasing Analysis

purchase_analysis_df = pd.DataFrame({"Num_Unique_Items":num_unique_items, 
                                     "Average_Price":ave_price_items, 
                                     "Total_Purchases":total_items_purchased, 
                                     "Total_Revenue":total_revenue}, index = [0])
# Values formatted to financial style

purchase_analysis_df["Average_Price"] = purchase_analysis_df["Average_Price"].map('${:,.2f}'.format)
purchase_analysis_df["Total_Revenue"] = purchase_analysis_df["Total_Revenue"].map('${:,.2f}'.format)
purchase_analysis_df

Unnamed: 0,Num_Unique_Items,Average_Price,Total_Purchases,Total_Revenue
0,179,$3.05,780,"$2,379.77"


In [14]:
# Gender Demographics
    # Percentage and Count of Male Players
    # Percentage and Count of Female Players
    # Percentage and Count of Other / Non-Disclosed


# Create data frame grouped by gender
gender_df = purchase_df.groupby("Gender").agg({"SN":'nunique'})

# Get the total_players in data frame gender_df
total_players = gender_df["SN"].sum()

# Calculate the gender percentages 
gender_percentage = gender_df/total_players*100
gender_percentage

# Add percentages column to gender data frame
gender_df["Percentage"] = gender_percentage
gender_df

# Format Percentages column to add percent sign and make it two decimal places
gender_df["Percentage"] = gender_df["Percentage"].map('{:.2f}%'.format)
gender_column_list = list(gender_df.columns)

# Rename column names of gender_df and assign to new data frame
summary_gender_df = gender_df.rename(columns={"SN":"Total Count", 'Percentage':"Percentage of Players"})

summary_gender_df

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06%
Male,484,84.03%
Other / Non-Disclosed,11,1.91%
