In [1]:
# Dependencies and Setup
import pandas as pd

# CSV file to load
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing Data File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [2]:
# Get the count of total (unique) players and display them 
player_count = purchase_data['SN'].nunique()
print(f"PLAYER COUNT")
print(f"_" * 30)
print(f"\nTotal Number of Players: {player_count}")

PLAYER COUNT
______________________________

Total Number of Players: 576


In [3]:
# Run basic calculations on the data frame to come up with a total purchase analysis 
# Calculate number of unique items, average purchase price, total number of purchases and total revenue
unique_items = purchase_data['Item ID'].nunique()
average_price = purchase_data['Price'].mean()
total_purchases = purchase_data['Purchase ID'].nunique()
total_revenue = purchase_data['Price'].sum()

# Create a summary data frame to hold the analyzed results
purchase_analysis = [(unique_items, average_price, total_purchases, total_revenue)]
purchase_summary = pd.DataFrame(purchase_analysis, columns=['Number of Unique Items','Average Price','Number of Purchases','Total Revenue'])

# Format the average price and total revenue values
purchase_summary['Average Price'] = purchase_summary['Average Price'].map("${:.2f}".format)
purchase_summary['Total Revenue'] = purchase_summary['Total Revenue'].map("${:,.2f}".format)

# Display Purchase Analysis Summary data frame 
print(f"PURCHASE ANALYSIS")
print(f"_" * 77)
purchase_summary

PURCHASE ANALYSIS
_____________________________________________________________________________


Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$3.05,780,"$2,379.77"


In [4]:
# De-dup data frame so we can get a new data frame of unique players only
unique_players = purchase_data.drop_duplicates(subset='SN')

# Get count of players based on their gender
gender_counts = unique_players['Gender'].value_counts()

# Save gender counts to a new summary data frame
gender_summary = pd.DataFrame(gender_counts)

# Calculate the percentage of players for each gender
gender_summary['Percentage of Players'] = gender_summary['Gender'] / gender_summary['Gender'].sum()

# Round the percentage column to two decimal points in the summary data frame
gender_summary['Percentage of Players'] = gender_summary['Percentage of Players'].map("{:.2%}".format)

# Display Gender Demographics Summary data frame 
print(f"GENDER DEMOGRAPHICS")
print(f"_" * 53)
gender_summary

GENDER DEMOGRAPHICS
_____________________________________________________


Unnamed: 0,Gender,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [5]:
# Group the original data frame by Gender to run a Gender-based purchase analysis
gender_demographics = purchase_data.groupby(['Gender'])

# Calculate purchase count, average purchase price, total purchase value and average purchase total per person
# These are all saved as Series data type
purchase_count = gender_demographics['Purchase ID'].nunique()
average_purchase_price = gender_demographics['Price'].mean()
total_purchase_price = gender_demographics['Price'].sum()
average_purchase_total_per_player = total_purchase_price / gender_counts

# Save the purchase analyses Series' to a Dictionary
purchase_analysis_by_gender = { 'Purchase Count': purchase_count, 'Average Purchase Price': average_purchase_price, 'Total Purchase Value': total_purchase_price, 'Avg Total Purchase per Person': average_purchase_total_per_player }

# Create Purchase Analysis Summary data frame from the Dictionary
purchase_summary_by_gender = pd.DataFrame(purchase_analysis_by_gender)

# Format all the price columns in the data frame
purchase_summary_by_gender['Average Purchase Price'] = purchase_summary_by_gender['Average Purchase Price'].map("${:.2f}".format)
purchase_summary_by_gender['Total Purchase Value'] = purchase_summary_by_gender['Total Purchase Value'].map("${:,.2f}".format)
purchase_summary_by_gender['Avg Total Purchase per Person'] = purchase_summary_by_gender['Avg Total Purchase per Person'].map("${:.2f}".format)

# Display Purchase Analysis Summary data frame
print(f"PURCHASE ANALYSIS (BY GENDER)")
print(f"_" * 114)
purchase_summary_by_gender

PURCHASE ANALYSIS (BY GENDER)
__________________________________________________________________________________________________________________


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [6]:
# Establish bins for player ages
max_age = unique_players['Age'].max()
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, max_age]
age_categories = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

# Categorize players using the age bins
unique_players['Age Category'] = (pd.cut(unique_players.loc[:,'Age'], age_bins, labels=age_categories)).copy()

# Calculate the total number of players by age group
player_counts_by_age = unique_players['Age Category'].value_counts()

# Create a summary data frame to hold the results
age_summary_data = pd.DataFrame(player_counts_by_age)
age_summary_data = age_summary_data.rename(columns={'Age Category': 'Total Count'})

# Calculate the percentage of players by age group
age_summary_data['Percentage of Players'] = age_summary_data['Total Count'] / age_summary_data['Total Count'].sum()

# Round the percentage column to two decimal points in the summary data frame
age_summary_data['Percentage of Players'] = age_summary_data['Percentage of Players'].map("{:.2%}".format)

# Display Age Demographics Summary data frame
print(f"AGE DEMOGRAPHICS")
print(f"_" * 42)
age_summary_data

AGE DEMOGRAPHICS
__________________________________________


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0,Total Count,Percentage of Players
20-24,258,44.79%
15-19,107,18.58%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
10-14,22,3.82%
<10,17,2.95%
40+,12,2.08%


In [7]:
# Establish bins for player ages
max_age = purchase_data['Age'].max()
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, max_age]
age_categories = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

# Categorize players using the age bins
# Using the copy function on pd.cut to prevent "SettingWithCopyWarning" warnings:https://www.dataquest.io/blog/settingwithcopywarning/
purchase_data.loc[:,'Age Category'] = (pd.cut(purchase_data.loc[:,'Age'], age_bins, labels=age_categories)).copy()

# Group the original data frame by Age to run an Age-based purchase analysis
age_demographics = purchase_data.groupby(['Age Category'])

# Calculate purchase count, average purchase price, total purchase value and average purchase total per person
# These are all saved as Series data type
purchase_count = age_demographics['Purchase ID'].nunique()
average_purchase_price = age_demographics['Price'].mean()
total_purchase_price = age_demographics['Price'].sum()
average_purchase_total_per_player = total_purchase_price / player_counts_by_age

# Save the purchase analyses Series' to a Dictionary
purchase_analysis_by_age = { 'Purchase Count': purchase_count, 'Average Purchase Price': average_purchase_price, 'Total Purchase Value': total_purchase_price, 'Avg Total Purchase per Person': average_purchase_total_per_player }

# Create Purchase Analysis Summary data frame from the Dictionary
purchase_summary_by_age = pd.DataFrame(purchase_analysis_by_age)
purchase_summary_by_age.index.names = ['Age Ranges']

# Format all the price columns in the data frame
purchase_summary_by_age['Average Purchase Price'] = purchase_summary_by_age['Average Purchase Price'].map("${:.2f}".format)
purchase_summary_by_age['Total Purchase Value'] = purchase_summary_by_age['Total Purchase Value'].map("${:,.2f}".format)
purchase_summary_by_age['Avg Total Purchase per Person'] = purchase_summary_by_age['Avg Total Purchase per Person'].map("${:.2f}".format)

# Display Purchase Analysis Summary data frame
print(f"PURCHASE ANALYSIS (BY AGE)")
print(f"_" * 103)
purchase_summary_by_age

PURCHASE ANALYSIS (BY AGE)
_______________________________________________________________________________________________________


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


In [8]:
# Group the original data frame by player name to run an analysis on Top Spenders
grouped_by_players = purchase_data.groupby(['SN'])

# Calculate purchase count, average purchase price and total purchase value
# These are all saved as Series data type
purchase_count = grouped_by_players['Purchase ID'].nunique()
average_purchase_price = grouped_by_players['Price'].mean()
total_purchase_price = grouped_by_players['Price'].sum()

# Save the purchase analyses Series' to a Dictionary
purchase_analysis_by_player = { 'Purchase Count': purchase_count, 'Average Purchase Price': average_purchase_price, 'Total Purchase Value': total_purchase_price }

# Create Purchase Analysis Summary data frame from the Dictionary
purchase_summary_by_player = pd.DataFrame(purchase_analysis_by_player)

# Sort the total purchase value column in descending order to display the top spenders
top_spenders = purchase_summary_by_player.sort_values(['Total Purchase Value'], ascending=False)

# Format all the price columns in the most popular items data frame
top_spenders['Average Purchase Price'] = top_spenders['Average Purchase Price'].map("${:.2f}".format)
top_spenders['Total Purchase Value'] = top_spenders['Total Purchase Value'].map("${:,.2f}".format)

# Display Purchase Analysis Summary data frame
print(f"PURCHASE ANALYSIS - TOP 5 SPENDERS")
print(f"_" * 74)
top_spenders.head()

PURCHASE ANALYSIS - TOP 5 SPENDERS
__________________________________________________________________________


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


In [9]:
# Retrieve the Item ID, Item Name, and Item Price columns
item_id = purchase_data['Item ID']
item_name = purchase_data['Item Name']
item_price = purchase_data['Price']

# Group by Item ID and Item Name
grouped_by_items = purchase_data.groupby(['Item ID', 'Item Name', 'Price'])

# Perform calculations to obtain purchase count, item price, and total purchase value
purchase_count = grouped_by_items['Item ID'].count()
total_purchase_value = grouped_by_items['Price'].sum()

# Save the purchase analyses Series' to a Dictionary
items_series = { 'Purchase Count': purchase_count, 'Total Purchase Value': total_purchase_value }

# Create Purchase Analysis Summary data frame from the Dictionary
purchased_items_summary = pd.DataFrame(items_series)

# Sort the purchase count column in descending order to identify the most popular items
most_popular_items = purchased_items_summary.sort_values(['Purchase Count'], ascending=False)

# Format all the price columns in the data frame
most_popular_items['Total Purchase Value'] = most_popular_items['Total Purchase Value'].map("${:,.2f}".format)

# Display Purchase Analysis Summary data frame
print(f"PURCHASE ANALYSIS - MOST POPULAR ITEMS")
print(f"_" * 92)
most_popular_items.head()

PURCHASE ANALYSIS - MOST POPULAR ITEMS
____________________________________________________________________________________________


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Purchase Count,Total Purchase Value
Item ID,Item Name,Price,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",4.23,12,$50.76
145,Fiery Glass Crusader,4.58,9,$41.22
108,"Extraction, Quickblade Of Trembling Hands",3.53,9,$31.77
82,Nirvana,4.9,9,$44.10
19,"Pursuit, Cudgel of Necromancy",1.02,8,$8.16


In [10]:
# Sort the above table by total purchase value in descending order

most_profitable_items = purchased_items_summary.sort_values(['Total Purchase Value'], ascending=False)

# Format all the price columns in the data frame
most_profitable_items['Total Purchase Value'] = most_profitable_items['Total Purchase Value'].map("${:,.2f}".format)

# Display Purchase Analysis Summary data frame
print(f"PURCHASE ANALYSIS - MOST PROFITABLE ITEMS")
print(f"_" * 92)
most_profitable_items.head()

PURCHASE ANALYSIS - MOST PROFITABLE ITEMS
____________________________________________________________________________________________


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Purchase Count,Total Purchase Value
Item ID,Item Name,Price,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",4.23,12,$50.76
82,Nirvana,4.9,9,$44.10
145,Fiery Glass Crusader,4.58,9,$41.22
92,Final Critic,4.88,8,$39.04
103,Singed Scalpel,4.35,8,$34.80
