## Heroes of Pymoli Data Analysis

In [2]:
# Dependencies and set up
import pandas as pd

filePath = 'Resources/purchase_data.csv'

purchase_data = pd.read_csv(filePath)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


### Player Count
To get the total number of players, we need to count the total number of unique Screen Names in column "SN". The result is 576 unique players

In [3]:
len(purchase_data["SN"].unique())

576

### Purchasing Analysis (Total)
* Number of unique items: 183
* Average Price
* Number of Purchases
* Total Revenue

In [4]:
# pd.options.display.float_format = '${:,.2f}'.format # format floats to currency formatting
purchase_data["Price"].mean() # 3.05
len(purchase_data["Purchase ID"].unique()) #780
purchase_data["Price"].sum() # 2379.77

# Create dictionary to store summary values
summary_dict = {
    "Number of Unique Items": int(len(purchase_data["Item ID"].unique())),
    "Average Price": "$" + str(round(purchase_data["Price"].mean(),2)),
    "Number of Purchases": int(len(purchase_data["Purchase ID"].unique())),
    "Total Revenue": "$" + str(purchase_data["Price"].sum())
}

summary = pd.DataFrame(summary_dict,[0])
summary.head()

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$3.05,780,$2379.77


### Gender Demographics
* Percentage and Count of Male Players
* Percentage and Count of Female Players
* Percentage and Count of Other / Non-Disclosed

In [5]:
purchase_data.head()
df_deduped = purchase_data.drop_duplicates(subset='SN', keep='first') # remove duplicates from "SN" colume in order to get counts of just the unique players

count_male = df_deduped["Gender"].value_counts()["Male"]
count_female = df_deduped["Gender"].value_counts()["Female"]
count_other = df_deduped["Gender"].value_counts()["Other / Non-Disclosed"]
count_total = df_deduped["Gender"].count()
male_pct = round((count_male / count_total) * 100,2)
female_pct = round((count_female / count_total) * 100,2)
other_pct = round((count_other / count_total) * 100,2)


gender_demo_summary_dict = {
    "Male": df_deduped["Gender"].value_counts()["Male"],
    "Female": df_deduped["Gender"].value_counts()["Female"],
    "Male": df_deduped["Gender"].value_counts()["Male"],
}

gender_summary = pd.DataFrame({
    "Total Count":[count_male,count_female,count_other],
    "Percentage of Playes":[male_pct,female_pct,other_pct]},
    index=['Male','Female',"Other / Non-Disclosed"])
gender_summary

Unnamed: 0,Total Count,Percentage of Playes
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


### Purchasing Analysis (Gender)
* Purchase Count
* Average Purchase Price
* Total Purchase Value
* Average Purchase Total per Person by Gender

In [31]:
# Group Purchase Data by Gender
gender_groups = purchase_data.groupby("Gender")

# Calculate Fields
avg_price = gender_groups.mean()['Price'] # Average Purchase Price
tot_purchase = gender_groups.sum()['Price'] # Total Purchase Value
num_purchases = gender_groups.count()['Purchase ID'] # Purchase Count
avg_purchase_per_person = tot_purchase / df_deduped["Gender"].value_counts()

# Create Summary Dataframe
purchasing_analysis_gender = pd.DataFrame({
    'Purchase Count':num_purchases,
    'Average Purchase Price':avg_price,
    'Total Purchase Value':tot_purchase,
    'Avg Total Purchase per Person':avg_purchase_per_person
})

# Formatting
purchasing_analysis_gender['Average Purchase Price'] = purchasing_analysis_gender['Average Purchase Price'].map("${:.2f}".format)
purchasing_analysis_gender['Total Purchase Value'] = purchasing_analysis_gender['Total Purchase Value'].map("${:,.2f}".format)
purchasing_analysis_gender['Avg Total Purchase per Person'] = purchasing_analysis_gender['Avg Total Purchase per Person'].map("${:.2f}".format)
purchasing_analysis_gender

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56
