# Analysis of HerosOfPymoli Dataset

# Section 1: Setup

In [10]:
#Import Dependencies
import pandas as pd
import os

#Define relative path and read purchase_data.csv
path = os.path.join('Resources','purchase_data.csv')
df = pd.read_csv(path)

In [11]:
#Review contents of dataset
df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [12]:
#Review columns data types and row numbers
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 780 entries, 0 to 779
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Purchase ID  780 non-null    int64  
 1   SN           780 non-null    object 
 2   Age          780 non-null    int64  
 3   Gender       780 non-null    object 
 4   Item ID      780 non-null    int64  
 5   Item Name    780 non-null    object 
 6   Price        780 non-null    float64
dtypes: float64(1), int64(3), object(3)
memory usage: 42.8+ KB


In [13]:
#Verify we don't have rows with NaN
df.isna().any()

Purchase ID    False
SN             False
Age            False
Gender         False
Item ID        False
Item Name      False
Price          False
dtype: bool

In [14]:
#Verify we don't have rows with NaN
df.isnull().any()

Purchase ID    False
SN             False
Age            False
Gender         False
Item ID        False
Item Name      False
Price          False
dtype: bool

# Section 2: Player Count

In [18]:
#Players are based on SN so we will verify total number of players based on unique SN
unique_player = df['SN'].unique()
#Store total number of players in variable
total_player = len(unique_player)
#Visualize total number of players as datafram
total_playerdf = pd.DataFrame([{'Player Count':total_player}])
total_playerdf

Unnamed: 0,Player Count
0,576


# Section 3: Purchasing Analysis

In [36]:
#Get the number of unique items in the game based on Item ID
unique_items = df['Item ID'].unique()
total_items = len(unique_items)
#Get average purchase price
avg_price = df['Price'].mean()
#Get total number of purchases (each row is a purchase: number of observations)
total_purchase = len(df)
#Get total revenue
total_revenue = df['Price'].sum()
#Create Purchasing Analysis data frame
panalysisdf = pd.DataFrame([{'Number Unique Items': total_items,
                            'Avg Purchase Price':avg_price,
                            'Total Numver Purchases':total_purchase,
                            'Total Revenue':total_revenue}])
#Format Avg Purchase Price and Total Revenue
panalysisdf['Avg Purchase Price']=panalysisdf['Avg Purchase Price'].map('${:.2f}'.format)
panalysisdf['Total Revenue']=panalysisdf['Total Revenue'].map('${:.2f}'.format)

#Visualize Purchasing Analysis data frame
panalysisdf

Unnamed: 0,Number Unique Items,Avg Purchase Price,Total Numver Purchases,Total Revenue
0,179,$3.05,780,$2379.77
