# Heroes of Pymoli: In-Game Purchase Insights

## Trends

Three observable trends based on the data

## Analysis

In [95]:
# Dependencies
import pandas as pd

In [96]:
# Load csv file
purchases_csv = "Resources/purchase_data.csv"

# Read csv and store in DataFrame
purchases_df = pd.read_csv(purchases_csv)
purchases_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [97]:
purchases_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Purchase ID  780 non-null    int64  
 1   SN           780 non-null    object 
 2   Age          780 non-null    int64  
 3   Gender       780 non-null    object 
 4   Item ID      780 non-null    int64  
 5   Item Name    780 non-null    object 
 6   Price        780 non-null    float64
dtypes: float64(1), int64(3), object(3)
memory usage: 42.8+ KB


In [98]:
purchases_df.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,91.755128,3.050987
std,225.310896,6.659444,52.697702,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,47.75,1.98
50%,389.5,22.0,92.0,3.15
75%,584.25,25.0,138.0,4.08
max,779.0,45.0,183.0,4.99


## Player count

Total number of players = 576

In [99]:
# Number of unique player names in SN column = total number of players
total_unique_players = purchases_df["SN"].nunique()
total_unique_players

576

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.
* Create a summary data frame to hold the results
* Optional: give the displayed data cleaner formatting
* Display the summary data frame

In [100]:
# Unique values in Item ID column provides number of unique items purchased

unique_items = purchases_df["Item ID"].nunique()
unique_items

179

In [101]:
# Mean of price column = average price
average_price = purchases_df["Price"].mean()
average_price

3.050987179487176

In [102]:
# Count of purchase ID = total number of purchases
total_purchases = purchases_df["Purchase ID"].count()
total_purchases

780

In [103]:
# Sum of price column = total revenue
total_revenue = purchases_df["Price"].sum()
total_revenue

2379.77

In [116]:
# Create a DataFrame of summary data

summary_df = pd.DataFrame({"Total unique players": [total_unique_players],"Unique items": [unique_items],
                           "Average price": [average_price],
                           "Total purchases": [total_purchases],
                          "Total revenue": [total_revenue]}).round(2)

#style.hide_index()

summary_df

Unnamed: 0,Total unique players,Unique items,Average price,Total purchases,Total revenue
0,576,179,3.05,780,2379.77


# Gender Demographics

* Percentage and Count of Male Players<br>
* Percentage and Count of Female Players<br>
* Percentage and Count of Other / Non-Disclosed

In [158]:
# Create a new DataFrame with unique players (something like a pivot table)

#purchases_df.pivot_table(index="Gender",values=["Age","Price"])




Unnamed: 0_level_0,Age,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,21.345133,3.203009
Male,22.917178,3.017853
Other / Non-Disclosed,24.2,3.346


In [83]:
gender_df = purchases_df["Gender"].value_counts()
gender_df

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [91]:
percentage = purchases_df["Gender"].value_counts()/576
percentage

Male                     1.131944
Female                   0.196181
Other / Non-Disclosed    0.026042
Name: Gender, dtype: float64

In [137]:
grouped = purchases_df.groupby(["SN","Gender"]).sum().head(10)
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase ID,Age,Item ID,Price
SN,Gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Adairialis76,Male,467,16,123,2.28
Adastirin33,Female,142,35,175,4.48
Aeda94,Male,388,17,128,4.91
Aela59,Male,28,21,119,4.32
Aelaria33,Male,630,23,171,1.79
Aelastirin39,Male,984,46,134,7.29
Aelidru27,Male,705,22,183,1.09
Aelin32,Male,723,60,326,8.98
Aelly27,Male,471,48,130,6.79
Aellynun67,Male,286,25,153,3.74


In [None]:
# It is also possible to group a DataFrame by multiple columns
# This returns an object with multiple indexes, however, which can be harder to deal with
#grouped_international_data = converted_ufo_df.groupby(['country', 'state'])

#grouped_international_data.count().head(20)

# Converting a GroupBy object into a DataFrame
#international_duration_df = pd.DataFrame(
    #grouped_international_data["duration (seconds)"].sum())
#international_duration_df.head(10)