# Analysis of HerosOfPymoli Dataset

# Section 1: Setup

In [37]:
#Import Dependencies
import pandas as pd
import os

#Define relative path and read purchase_data.csv
path = os.path.join('Resources','purchase_data.csv')
df = pd.read_csv(path)

In [38]:
#Review contents of dataset
df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [39]:
#Review columns data types and row numbers
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Purchase ID  780 non-null    int64  
 1   SN           780 non-null    object 
 2   Age          780 non-null    int64  
 3   Gender       780 non-null    object 
 4   Item ID      780 non-null    int64  
 5   Item Name    780 non-null    object 
 6   Price        780 non-null    float64
dtypes: float64(1), int64(3), object(3)
memory usage: 42.8+ KB


In [40]:
#Verify we don't have rows with NaN
df.isna().any()

Purchase ID    False
SN             False
Age            False
Gender         False
Item ID        False
Item Name      False
Price          False
dtype: bool

In [41]:
#Verify we don't have rows with NaN
df.isnull().any()

Purchase ID    False
SN             False
Age            False
Gender         False
Item ID        False
Item Name      False
Price          False
dtype: bool

# Section 2: Player Count

In [42]:
#Players are based on SN so we will verify total number of players based on unique SN
unique_player = df['SN'].unique()

#Store total number of players in variable
total_player = len(unique_player)

#Visualize total number of players as datafram
total_playerdf = pd.DataFrame([{'Player Count':total_player}])
total_playerdf

Unnamed: 0,Player Count
0,576


# Section 3: Purchasing Analysis

In [49]:
#Get the number of unique items in the game based on Item ID
unique_items = df['Item ID'].unique()
total_items = len(unique_items)

#Get average purchase price
avg_price = df['Price'].mean()

#Get total number of purchases (each row is a purchase: number of observations)
total_purchase = len(df)

#Get total revenue
total_revenue = df['Price'].sum()

#Create Purchasing Analysis data frame
panalysisdf = pd.DataFrame([{'Number Unique Items': total_items,
                            'Avg Purchase Price':avg_price,
                            'Total Number Purchases':total_purchase,
                            'Total Revenue':total_revenue}])

#Format Avg Purchase Price and Total Revenue
panalysisdf['Avg Purchase Price']=panalysisdf['Avg Purchase Price'].map('${:.2f}'.format)
panalysisdf['Total Revenue']=panalysisdf['Total Revenue'].map('${:.2f}'.format)

#Visualize Purchasing Analysis data frame
panalysisdf

Unnamed: 0,Number Unique Items,Avg Purchase Price,Total Number Purchases,Total Revenue
0,179,$3.05,780,$2379.77


# Section 4: Demographics Analysis

In [84]:
#Create new df for players
playerdf = df[['SN','Gender']]

#Drop duplicates to get unique count of players
playerdf = playerdf.drop_duplicates(subset=['SN'])

#Get percentage and count of male players
total_male = len(playerdf.loc[df['Gender']=='Male'])
percent_male = total_male/total_player*100

#Get percentage and count of female players
total_female = len(playerdf.loc[df['Gender']=='Female'])
percent_female = total_female/total_player*100

#Get percentage and count of Other / Non-Disclosed
total_other = len(playerdf.loc[df['Gender']=='Other / Non-Disclosed'])
percent_other = total_other/total_player*100

#Build Gender Analysis data frame
ganalysisdf = pd.DataFrame({'Total Count': [total_male, total_female,total_other,total_player],
                            '% of Players':[percent_male,percent_female,percent_other,100]},
                           ['Male','Female','Other / Non-Disclosed','TOTAL'])

#Format column % of Players
ganalysisdf['% of Players']=ganalysisdf['% of Players'].map('{:.2f}%'.format)

#Visualize  Gender Analysis data fram
ganalysisdf

Unnamed: 0,Total Count,% of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%
TOTAL,576,100.00%
