In [14]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

# Display 1st 5 rows of Dataframe 
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [13]:
### Player Count
# Finding the Total Number of Players
player_count = len(purchase_data["SN"].unique())
print(f"The Total Number of Players is {player_count}.")

The Total Number of Players is 576.


In [41]:
### Purchasing Analysis (Total)
# Finding the Number of Unique Items
unique_items = len(purchase_data["Item Name"].unique())

# Finding the Average Purchase Price
avg_price = purchase_data["Price"].mean()

# Finding the Total Number of Purchases
total_purchases = len(purchase_data["Price"])

# Finding the Total Revenue
total_revenue = purchase_data["Price"].sum()

# Outputting the Purchasing Analysis into a Summary Dataframe
# First, inputted calculated into a dictionary
purchasing_analysis_d = {'Number of Unique Items': [unique_items], \
                         'Average Purchase Price': [avg_price], \
                         'Total Number of Purchases': [total_purchases], \
                         'Total Revenue': [total_revenue]}
# Then, converted dictionary to a dataframe and added currency formatting
# to the Average Purchase Price and Total Revenue columns
purchasing_analysis_df = pd.DataFrame(data = purchasing_analysis_d)
purchasing_analysis_df.style.format({'Average Purchase Price': "${:,.2f}", \
                                   'Total Revenue': "${:,.2f}"})

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [136]:
### Gender Demographics


purchase_data_dropped = purchase_data.drop_duplicates(['SN'])
# Removed duplicate SN entries from dataframe so that the same person isn't counted multiple times below
gender = purchase_data_dropped["Gender"].value_counts().sort_index()
# Sorted in alphabetical order so that the entries and respective counts can be more easily determined

# Finding the percentage and count of male players
male_count = gender[1]
male_percentage = male_count/player_count

# Finding the percentage and count of female players
female_count = gender[0]
female_percentage = female_count/player_count

# Finding the percentage and count of other/non-disclosed
other_count = gender[2]
other_percentage = other_count/player_count

# Creating Gender Demographics summary dataframe
gender_demo_d = [["Female", female_count, female_percentage], \
                 ["Male", male_count, male_percentage], \
                 ["Other / Non-Disclosed", other_count, other_percentage]]
gender_demo_df = pd.DataFrame(gender_demo_d, columns = ['Gender', 'Total Count', 'Percentage of Players'])
gender_demo_df.set_index("Gender").style.format({"Percentage of Players": "{:,.2%}"})

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06%
Male,484,84.03%
Other / Non-Disclosed,11,1.91%


In [199]:
### Purchasing Analysis (Gender)

# Finding the Purchase Count by Gender
purchase_count_gender = purchase_data["Gender"].value_counts().sort_index()

# Determining the Average Purchase Price by Gender
avg_purch_gender = purchase_data.groupby("Gender")['Price'].mean()

# Finding the Total Purchase Value by Gender
purchase_value_total = purchase_data.groupby("Gender")['Price'].sum()

# Determining the Average Purchase Total per Person by Gender
value_person_gender = [None]*len(purchase_value_total)
for i in range(0, len(purchase_value_total)):
    value_person_gender[i] = purchase_value_total[i]/gender_demo_df.at[i, "Total Count"]
    
# Creating Purchasing Analysis (Gender) summary dataframe
genders = ['Female', 'Male', 'Other / Non Disclosed']
columns = ["Gender", 'Purchase Count', 'Average Purchase Price', 'Total Purchase Value', "Avg. Total Purchase per Person"]
gender_purchasing_df = pd.DataFrame(columns = columns)
for i in range(0, len(value_person_gender)):
    gender_purchasing_df = gender_purchasing_df.append({"Gender": genders[i], 
                                 "Purchase Count": purchase_count_gender[i], \
                                 "Average Purchase Price": avg_purch_gender[i], \
                                 "Total Purchase Value": purchase_value_total[i], \
                                 "Avg. Total Purchase per Person": value_person_gender[i]}, ignore_index=True)
gender_purchasing_df.set_index("Gender").style.format({'Average Purchase Price': '${:,.2f}', \
                                                      'Total Purchase Value': '${:,.2f}', \
                                                      'Avg. Total Purchase per Person': '${:,.2f}'})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg. Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non Disclosed,15,$3.35,$50.19,$4.56


In [198]:
gender_purchasing_df

Unnamed: 0,Gender,Purchase Count,Average Purchase Price,Total Purchase Value,Avg. Total Purchase per Person
0,Female,113,3.203009,361.94,4.468395
1,Male,652,3.017853,1967.64,4.065372
2,Other / Non Disclosed,15,3.346,50.19,4.562727


In [186]:
purchase_value_gender

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64