# Heroes of Pymoli: In-Game Purchase Insights

## Trends

Three observable trends based on the data

## Analysis

In [1]:
# Dependencies
import pandas as pd

In [2]:
# Load csv file
purchases_csv = "Resources/purchase_data.csv"

# Read csv and store in DataFrame
purchases_df = pd.read_csv(purchases_csv)
purchases_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
purchases_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Purchase ID  780 non-null    int64  
 1   SN           780 non-null    object 
 2   Age          780 non-null    int64  
 3   Gender       780 non-null    object 
 4   Item ID      780 non-null    int64  
 5   Item Name    780 non-null    object 
 6   Price        780 non-null    float64
dtypes: float64(1), int64(3), object(3)
memory usage: 42.8+ KB


In [4]:
purchases_df.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,91.755128,3.050987
std,225.310896,6.659444,52.697702,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,47.75,1.98
50%,389.5,22.0,92.0,3.15
75%,584.25,25.0,138.0,4.08
max,779.0,45.0,183.0,4.99


## Player count

Total number of players = 576

In [5]:
# Number of unique player names in SN column = total number of players
total_unique_players = purchases_df["SN"].nunique()
total_unique_players

576

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.
* Create a summary data frame to hold the results
* Optional: give the displayed data cleaner formatting
* Display the summary data frame

In [6]:
# Unique values in Item ID column provides number of unique items purchased

unique_items = purchases_df["Item ID"].nunique()
unique_items

179

In [7]:
# Mean of price column = average price
average_price = purchases_df["Price"].mean()
average_price

3.050987179487176

In [8]:
# Count of purchase ID = total number of purchases
total_purchases = purchases_df["Purchase ID"].count()
total_purchases

780

In [101]:
# Sum of price column = total revenue
total_revenue = purchases_df["Price"].sum()
total_revenue

2379.77

In [168]:
# Create a DataFrame of summary data
summary_df = pd.DataFrame({"Total unique players": [total_unique_players],"Unique items": [unique_items],
                           "Average price": [average_price],
                           "Total purchases": [total_purchases],
                          "Total revenue": [total_revenue]}).round(2)

#Print summary Data Frame
summary_df

Unnamed: 0,Total unique players,Unique items,Average price,Total purchases,Total revenue
0,576,179,3.05,780,2379.77


# Gender Demographics

* Percentage and Count of Male Players<br>
* Percentage and Count of Female Players<br>
* Percentage and Count of Other / Non-Disclosed

In [143]:
# Pivot table to obtain data grouped by players and genders
gender_grouped_df = pd.pivot_table(data=purchases_df,index=['SN','Gender'])
gender_grouped_df = gender_grouped_df.reset_index()
gender_grouped_df


Unnamed: 0,SN,Gender,Age,Item ID,Price,Purchase ID
0,Adairialis76,Male,16,123.000000,2.280000,467.000000
1,Adastirin33,Female,35,175.000000,4.480000,142.000000
2,Aeda94,Male,17,128.000000,4.910000,388.000000
3,Aela59,Male,21,119.000000,4.320000,28.000000
4,Aelaria33,Male,23,171.000000,1.790000,630.000000
...,...,...,...,...,...,...
571,Yathecal82,Female,20,80.333333,2.073333,466.666667
572,Yathedeu43,Male,22,71.500000,3.010000,344.000000
573,Yoishirrala98,Female,17,145.000000,4.580000,572.000000
574,Zhisrisu83,Male,10,42.500000,3.945000,97.500000


In [164]:
# Calculate and store gender counts
gender_counts = gender_grouped_df["Gender"].value_counts()
gender_counts

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [165]:
# Calculate and store gender percentages
gender_percentage = gender_grouped_df["Gender"].value_counts(normalize=True)
gender_percentage

Male                     0.840278
Female                   0.140625
Other / Non-Disclosed    0.019097
Name: Gender, dtype: float64

In [169]:
# Create a new Data Frame containing gender counts and percentages
gender_demo_df = pd.DataFrame({"Total count": gender_counts,"Percentage of players": gender_percentage}).round(2)
# Print the gender demographics Data Frame
gender_demo_df

Unnamed: 0,Total count,Percentage of players
Male,484,0.84
Female,81,0.14
Other / Non-Disclosed,11,0.02


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [178]:
# purchase_count = purchases_df.groupby(["Gender"])["Price"].count()
# purchase_count

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Price, dtype: int64

In [177]:
# average_purchase_price = purchases_df.groupby(["Gender"])["Price"].mean()
# average_purchase_price 

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64

In [180]:
# total_purchase_value = purchases_df.groupby(["Gender"])["Price"].sum()
# total_purchase_value

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [228]:
# Instead of separate calculations for each variable, use aggregation function to find purchase coune, average purchase price and total purchase volume
purchasing_analysis_gender_agg = purchases_df.groupby(["Gender"]).agg({"Price": ["count", "mean",sum]})
# Rename columns
purchasing_analysis_gender_agg.columns=["Purchase Count","Average Purchase Price","Total Purchase Value"]
# purchasing_analysis_gender_agg

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,3.203009,361.94
Male,652,3.017853,1967.64
Other / Non-Disclosed,15,3.346,50.19


In [229]:
# Add a new column to the Data Frame for average total purchase per person i.e. total purchase value/purchase count
# average_purchase_pp = purchasing_analysis_gender_agg["Total Purchase Value"]/purchasing_analysis_gender_agg["Purchase Count"]
# average_purchase_pp
average_purchase_pp = purchasing_analysis_gender_agg["Total Purchase Value"]/gender_counts
purchasing_analysis_gender_agg["Avg Total Purchase per Person"] = average_purchase_pp

# Print the purchasing analysis (gender) Data Frame
purchasing_analysis_gender_agg.round(2)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.2,361.94,4.47
Male,652,3.02,1967.64,4.07
Other / Non-Disclosed,15,3.35,50.19,4.56
