In [1]:
# Dependencies
import pandas as pd
import numpy as np

In [2]:
# import csv file
csv_path = "Resources/purchase_data.csv"

# import csv to pandas dataframe 
purchase_data_df = pd.read_csv(csv_path, low_memory=False)
purchase_data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,101,Final Critic,4.19


In [3]:
### Player Count

# Find total number of players ... must have screen name
total_players = len(purchase_data_df["SN"].value_counts())

# create dataframe and table to display total players ... name it player count?
player_count = pd.DataFrame({"Total Players":[total_players]})
player_count

Unnamed: 0,Total Players
0,576


In [4]:
### Purchasing Analysis

# Calculate unique items, average price, purchase count, and revenue
unique_item_count = len((purchase_data_df["Item ID"]).unique())
average_price = purchase_data_df["Price"].mean()
purchase_count = len((purchase_data_df["Purchase ID"]))
revenue = purchase_data_df["Price"].sum()

# Create dataframe with calculated values
purchase_analysis_df = pd.DataFrame({"Number of Unique Items":[unique_item_count], "Average Price":[average_price], 
                                     "Number of Purchases":[purchase_count], "Total Revenue":[revenue]})

## RETURN TO THIS STEP
# currency format: avg price and revenue
#purchase_analysis_df.style.format({'Average Price' : '{:.2f}'})

# show table
purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,3.050987,780,2379.77


In [5]:
### Gender Demographics

# group by gender
gender_stats = purchase_data_df.groupby("Gender")

# count total screen names by gender
gender_totals = gender_stats.nunique()["SN"]

# divide gender count by total players
player_percent = gender_totals / total_players * 100

# create gender demo dataframe
gender_demo = pd.DataFrame({"Total Count": gender_totals, "Percentage of Players": player_percent})

# Format the values sorted by total count in descending order, 
gender_demo.sort_values(["Total Count"], ascending = False)

# 2 decimal places for percentage!! 
#gender_demo.style.format("Percentage of Players", "{:.2f%}")

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.027778
Female,81,14.0625
Other / Non-Disclosed,11,1.909722


In [6]:
### Purchase analysis by gender

# total purchases by gender
purchase_count_gender = gender_stats["Purchase ID"].count()

# avg purchase prices by gender
avg_price_gender = gender_stats["Price"].mean()

# avg purchase total by gender
avg_total_gender = gender_stats["Price"].sum()

# avg purchase total by gender divided by purchase count (per unique shopper)
avg_per_shopper = avg_total_gender / gender_totals

# create dataframe with calculated values
gender_demo = pd.DataFrame({"Purchase Count": purchase_count_gender, "Average Purchase Price": avg_price_gender, 
                           "Average Purchase Value": avg_total_gender, "Avg Purchase Total per Person": avg_per_shopper})

# formatting
# index in top left for gender
gender_demo.index.name = "Gender"

#format with currency style

# run & show
gender_demo

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Average Purchase Value,Avg Purchase Total per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.203009,361.94,4.468395
Male,652,3.017853,1967.64,4.065372
Other / Non-Disclosed,15,3.346,50.19,4.562727


In [7]:
### Age Demographics

# establish bins for age groups
age_bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

# sort values into established bins
purchase_data_df["Age Group"] = pd.cut(purchase_data_df["Age"], age_bins, labels=group_names)
purchase_data_df

# create new dataframe, including "Age Group", and group 
grouped_age = purchase_data_df.groupby("Age Group")

# total players by age group
age_total_count = grouped_age["SN"].nunique()

# percentages by age group
age_percentage = age_total_count / total_players * 100

# create dataframe from calculated values
age_demo = pd.DataFrame({"Total Count": age_total_count, "Percentage of Players": age_percentage})

# FORMATTING
# no index name in corner
age_demo.index.name = None
# format percentages with 2 decimal places

# run and show
age_demo

Unnamed: 0,Total Count,Percentage of Players
<10,17,2.951389
10-14,22,3.819444
15-19,107,18.576389
20-24,258,44.791667
25-29,77,13.368056
30-34,52,9.027778
35-39,31,5.381944
40+,12,2.083333


In [8]:
### Purchase Analysis by Age

# total purchases by age group
purchase_count_age = grouped_age["Purchase ID"].count()

# avg purchase prices by age group
avg_price_age = grouped_age["Price"].mean()

# avg purchase total by age group
avg_total_age = grouped_age["Price"].sum()

# avg purchase total by age group divided by purchase count (per unique shopper)
avg_per_shopper_age = avg_total_age / age_total_count

# create dataframe with calculated values
age_demo = pd.DataFrame({"Purchase Count": purchase_count_age, "Average Purchase Price": avg_price_age, 
                           "Average Purchase Value": avg_total_age , 
                         "Avg Purchase Total per Person": avg_per_shopper_age})

# formatting
# index in top left for age group
age_demo.index.name = "Age Ranges"

#format with currency style

# run & show
age_demo

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Average Purchase Value,Avg Purchase Total per Person
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,3.353478,77.13,4.537059
10-14,28,2.956429,82.78,3.762727
15-19,136,3.035956,412.89,3.858785
20-24,365,3.052219,1114.06,4.318062
25-29,101,2.90099,293.0,3.805195
30-34,73,2.931507,214.0,4.115385
35-39,41,3.601707,147.67,4.763548
40+,13,2.941538,38.24,3.186667


In [9]:
### Top Spenders

# group purchases by screen name
spender_data = purchase_data_df.groupby("SN")

# total purchases by screen name
spender_purchase_count = spender_data["Purchase ID"].count()

# average purchase by screen name
avg_purchase_spender = spender_data["Price"].mean()

# screen name purchase total
spender_purchase_total = spender_data["Price"].sum()

# create dataframe with calculated values
top_spenders = pd.DataFrame({"Purchase Count": spender_purchase_count, "Average Purchase Price": avg_purchase_spender, 
                             "Total Purchase Value": spender_purchase_total})

# FORMATTING
# sort by descending .... NOT WORKING??
top_five = top_spenders.sort_values(["Total Purchase Value"], ascending=False).head()

# format by currency

# run and show
top_spenders

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Adairialis76,1,2.280000,2.28
Adastirin33,1,4.480000,4.48
Aeda94,1,4.910000,4.91
Aela59,1,4.320000,4.32
Aelaria33,1,1.790000,1.79
...,...,...,...
Yathecal82,3,2.073333,6.22
Yathedeu43,2,3.010000,6.02
Yoishirrala98,1,4.580000,4.58
Zhisrisu83,2,3.945000,7.89
