In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
hero_file = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data_df = pd.read_csv(hero_file)

In [2]:
## 1. Player Count

In [3]:
#Display the total number of players
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [4]:
#Display the total number of players
players =purchase_data_df["SN"].value_counts()
players

Lisosia93       5
Idastidru52     4
Iral74          4
Chamjask73      3
Haillyrgue51    3
               ..
Jiskjask60      1
Iskossasda43    1
Lisilsa62       1
Yarithrgue83    1
Assylla81       1
Name: SN, Length: 576, dtype: int64

In [5]:
total_player_2= len(players)
total_player_2

576

In [6]:
#Display the total number of players
player_count_df= pd.DataFrame({"Total Players":[total_player_2]})
player_count_df

Unnamed: 0,Total Players
0,576


In [7]:
##2. Purchasing Analysis

In [8]:
#Purchasing analysis
#Run basic calculations to obtain number of unique items, average price, etc.
#Display the summary data frame
purchase_data_df.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,92.114103,3.050987
std,225.310896,6.659444,52.775943,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,48.0,1.98
50%,389.5,22.0,93.0,3.15
75%,584.25,25.0,139.0,4.08
max,779.0,45.0,183.0,4.99


In [9]:
total_revenue= purchase_data_df["Price"].sum()
total_revenue

2379.77

In [10]:
number_of_unique_items= len(purchase_data_df["Item ID"].value_counts())
number_of_unique_items

183

In [11]:
total_purchases=purchase_data_df["Purchase ID"].value_counts()
total_purchases

779    1
243    1
265    1
264    1
263    1
      ..
517    1
516    1
515    1
514    1
0      1
Name: Purchase ID, Length: 780, dtype: int64

In [12]:
number_of_purchases=total_purchases.sum()
number_of_purchases

780

In [13]:
ave_price= purchase_data_df["Price"].mean()

In [14]:
f"{ave_price:.3}"

'3.1'

In [15]:
#Purchasing analysis
#Run basic calculations to obtain number of unique items, average price, etc.
#Display the summary data frame
total_purchasing_analysis_df = pd.DataFrame({"Number of Unique Items":[number_of_unique_items],
                                              "Average Price":[ave_price],
                                              "Number of Purchases":[number_of_purchases],
                                              "Total Revenue":[total_revenue]})
total_purchasing_analysis_df 

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,3.050987,780,2379.77


In [16]:
##3. Gender Demographics

In [17]:
#Percentage and Count of Male Players
#Percentage and Count of Female Players
#Percentage and Count of Other / Non-Disclosed
gender_count = purchase_data_df["Gender"].value_counts()
gender_count.head()

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [18]:
male_count= gender_count["Male"]
male_count

652

In [19]:
female_count = gender_count["Female"]
female_count

113

In [20]:
Others_count = gender_count["Other / Non-Disclosed"]
Others_count

15

In [21]:
total_gender_count = gender_count.sum()
total_gender_count

780

In [47]:
percentage = gender_count/total_gender_count 
percentage = percentage.map(lambda x : f"{(x*100):.4}%")
percentage

Male                     83.59%
Female                   14.49%
Other / Non-Disclosed    1.923%
Name: Gender, dtype: object

In [48]:
gender_demographics_df = pd.DataFrame({"Total Count": gender_count, 
                                       "Percentage of players":percentage})
gender_demographics_df.head()


Unnamed: 0,Total Count,Percentage of players
Male,652,83.59%
Female,113,14.49%
Other / Non-Disclosed,15,1.923%


In [24]:
##4. Purchasing Analysis (Gender)

In [25]:
#Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender
#Create a summary data frame to hold the results
#Optional: give the displayed data cleaner formatting
# Display the summary data frame

In [49]:
purchase_data_df.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,92.114103,3.050987
std,225.310896,6.659444,52.775943,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,48.0,1.98
50%,389.5,22.0,93.0,3.15
75%,584.25,25.0,139.0,4.08
max,779.0,45.0,183.0,4.99


In [53]:
purchase_count= gender_count
purchase_count

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [64]:
avg_purchase_price_gender_df= purchase_data_df.set_index("Gender")
avg_purchase_price_gender_df.head()


Unnamed: 0_level_0,Purchase ID,SN,Age,Item ID,Item Name,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Male,0,Lisim78,20,108,"Extraction, Quickblade Of Trembling Hands",3.53
Male,1,Lisovynya38,40,143,Frenzied Scimitar,1.56
Male,2,Ithergue48,24,92,Final Critic,4.88
Male,3,Chamassasya86,24,100,Blindscythe,3.27
Male,4,Iskosia90,23,131,Fury,1.44


In [83]:
df = avg_purchase_price_gender_df.loc[:,["Price"]].head()
df

Unnamed: 0_level_0,Price
Gender,Unnamed: 1_level_1
Male,3.53
Male,1.56
Male,4.88
Male,3.27
Male,1.44


In [84]:
clean_df = df.dropna(how="any")
clean_df.count()

Price    5
dtype: int64

In [85]:
clean_df.head()

Unnamed: 0_level_0,Price
Gender,Unnamed: 1_level_1
Male,3.53
Male,1.56
Male,4.88
Male,3.27
Male,1.44


In [26]:
## 5. Age Demographics

In [27]:
# Establish bins for ages
#Categorize the existing players using the age bins. Hint: use pd.cut()
#Calculate the numbers and percentages by age group
#Create a summary data frame to hold the results
#Optional: round the percentage column to two decimal points
#Display Age Demographics Table

In [28]:
##6. Purchasing Analysis (Age)

In [29]:
#Bin the purchase_data data frame by age
#Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below
#Create a summary data frame to hold the results
#Optional: give the displayed data cleaner formatting
#Display the summary data frame

In [30]:
##7. Top Spenders

In [31]:
#Run basic calculations to obtain the results in the table below
#Create a summary data frame to hold the results
#Sort the total purchase value column in descending order
#Optional: give the displayed data cleaner formatting
#Display a preview of the summary data frame


In [32]:
##8. Most Popular Items

In [33]:
#Retrieve the Item ID, Item Name, and Item Price columns
#Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value
#Create a summary data frame to hold the results
#Sort the purchase count column in descending order
#Optional: give the displayed data cleaner formatting
#Display a preview of the summary data frame

In [34]:
## 9. Most Profitable Items

In [35]:
#Sort the above table by total purchase value in descending order
#Optional: give the displayed data cleaner formatting
#Display a preview of the data frame