# Heroes Of The Land - Game Data Analysis

In [2]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "./resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)


In [3]:
# set up purchase data into a data frame
df_organized = pd.DataFrame(purchase_data)
df_organized.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [4]:
# calculate total number of unique players
total_players = len(df_organized.SN.value_counts())
total_players
print(f"The total number of players are {total_players}.")

The total number of players are 576.


In [6]:
# Run basic calculations to obtain number of unique items, average price, etc.

In [7]:
# total unique items
total_unique_items = len(df_organized['Item ID'].value_counts())
total_unique_items

183

In [9]:
# total unique item names
unique_item_name = len(df_organized['Item Name'].value_counts())
unique_item_name

179

In [10]:
# calculate average price
average_price = round(df_organized.Price.mean(), 2)
average_price

3.05

In [11]:
# Calculated total revenue by summing up all prices that were sold to players
total_revenue = round(df_organized.Price.sum(), 2)
total_revenue

2379.77

In [12]:
# total unique_SN
total_unique_SN = len(df_organized['SN'].value_counts())
total_unique_SN

576

In [13]:
# average purchase per player
avg_purchase_PP = round(total_revenue / total_unique_SN, 2)
avg_purchase_PP

4.13

In [14]:
# total unique purchase ID's. later realized these were transaction numbers
total_unique_purchID = len(df_organized['Purchase ID'].value_counts())
total_unique_purchID

780

In [15]:
# average age
average_age = round(df_organized.Age.mean())
average_age

23

In [16]:
#df_organized.head()

In [17]:
# Create a summary data frame to hold the results and display summary
purch_analysis_sumdf = pd.DataFrame({"Total # of Unique SN's": [total_unique_SN],
                                     "Total # of Purchases": [total_unique_purchID],
                                     "Total Unique Items Offered": [total_unique_items],
                                     "Total Revenue in $": [total_revenue],
                                     "Average Purchase Per Player in $": [avg_purchase_PP],
                                     "Average Price in $": [average_price],
                                     "Average Age": [average_age],
                                    })                  
purch_analysis_sumdf

Unnamed: 0,Total # of Unique SN's,Total # of Purchases,Total Unique Items Offered,Total Revenue in $,Average Purchase Per Player in $,Average Price in $,Average Age
0,576,780,183,2379.77,4.13,3.05,23


In [18]:
# located all male players in purchase_data
male_df = df_organized.loc[df_organized["Gender"] == "Male" , :]
male_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [19]:
# grouped all male players by their unique SN
grouped_male_df = male_df.groupby(['SN'])
#print(grouped_male_df)

# total male players
total_male_players = len(grouped_male_df)
total_male_players

484

In [20]:
# located all female players in the purchase data set
female_df = df_organized.loc[df_organized["Gender"] == "Female" , :]
female_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
15,15,Lisassa64,21,Female,98,"Deadline, Voice Of Subtlety",2.89
18,18,Reunasu60,22,Female,82,Nirvana,4.9
38,38,Reulae52,10,Female,116,Renewed Skeletal Katana,4.18
41,41,Assosia88,20,Female,7,"Thorn, Satchel of Dark Souls",1.33
55,55,Phaelap26,25,Female,84,Arcane Gem,3.79


In [21]:
# group the female player data by their SN's
grouped_female_df = female_df.groupby(['SN'])
#print(grouped_female_df)

#calculate the number of female players
total_female_players = len(grouped_female_df)
total_female_players

81

In [22]:
# located all other players
other_df = df_organized.loc[df_organized["Gender"] == "Other / Non-Disclosed" , :]
other_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58
22,22,Siarithria38,38,Other / Non-Disclosed,24,Warped Fetish,3.81
82,82,Haerithp41,16,Other / Non-Disclosed,160,Azurewrath,4.4
111,111,Sundim98,21,Other / Non-Disclosed,41,Orbit,4.75
228,228,Jiskirran77,20,Other / Non-Disclosed,80,Dreamsong,3.39


In [26]:
# group the other player data by their SN's

grouped_other_df = other_df.groupby(['SN'])
#print(grouped_other_df)

#calculated all other oplayers
total_other_players = len(grouped_other_df)
total_other_players

11

In [27]:
male_percentage = round(total_male_players / total_players, 4)*100
male_percentage

84.03

In [28]:

female_percentage = round(total_female_players / total_players, 4)*100
female_percentage

14.06

In [29]:

other_percentage = round(total_other_players / total_players, 4)*100
other_percentage

1.91

In [32]:

player_summary_df = pd.DataFrame({"Total Count": [total_male_players, total_female_players, total_other_players],
                                 "Percentage of Players": [male_percentage, female_percentage, other_percentage]},
                                 index=["Male", "Female", "Other / Non-Disclosed"])            
player_summary_df

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


In [33]:
female_df = df_organized.loc[df_organized["Gender"] == "Female" , :]

total_purchase_countF = len(female_df)

total_purchase_countF

113

In [34]:
male_df = df_organized.loc[df_organized["Gender"] == "Male" , :]

total_purchase_countM = len(male_df)

total_purchase_countM

652

In [35]:
other_df = df_organized.loc[df_organized["Gender"] == "Other / Non-Disclosed" , :]

total_purchase_countO = len(other_df)

total_purchase_countO

15