In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [2]:
purchase_data.head()
purchase_data = purchase_data.dropna(how="any")
purchase_data.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

In [3]:
# Find the number of unique items
unique_items = purchase_data["Item Name"].nunique()

# Find the average price across the items
average_price = purchase_data["Price"].mean()

# Find the number of purchases
number_of_purchases = purchase_data["Item ID"].count()

# sum up the total revenue
total_revenue = purchase_data["Price"].sum()


# Create a summary data frame out of the series created above
summary_df = pd.DataFrame(
                        {"Number of Unique items" : unique_items,
                         "Average Price" : average_price,
                         "Total Number of Purchase" : number_of_purchases,
                         "Total Revenue" : total_revenue}, index = [0])
summary_df

Unnamed: 0,Number of Unique items,Average Price,Total Number of Purchase,Total Revenue
0,179,3.050987,780,2379.77


In [4]:
# calculate the counts for male, female, and other
gender_demo = purchase_data["Gender"].value_counts()
gender_demo_df = pd.DataFrame(gender_demo).reset_index()

gender_demo_df.columns = ['type', 'Total Count']
gender_demo_df




Unnamed: 0,type,Total Count
0,Male,652
1,Female,113
2,Other / Non-Disclosed,15


In [5]:
# calculate the counts for male, female, and other
gender_demo_df["Percentage of Players"] = gender_demo_df["Total Count"]/gender_demo_df["Total Count"].sum() * 100

gender_demo_df


Unnamed: 0,type,Total Count,Percentage of Players
0,Male,652,83.589744
1,Female,113,14.487179
2,Other / Non-Disclosed,15,1.923077


In [24]:
grouped_df = purchase_data.groupby(["Gender"])
count_of_gender = grouped_df["Price"].count()
sum_per_gender = grouped_df["Price"].sum()
avg_per_gender = grouped_df["Price"].mean()


In [25]:
grouped_df.head(10)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
5,5,Yalae81,22,Male,81,Dreamkiss,3.61
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67
8,8,Undjask33,22,Male,21,Souleater,1.1
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58


In [26]:
# Create a summary data frame out of the series created above
gender_summary_df = pd.DataFrame(
                        {"Purchase Count" : count_of_gender,
                         "Total Purchase Value" : sum_per_gender,
                         "Average Purchase Price" : avg_per_gender
                         })
 #                        "Purchase Count" : count_of_gender.values})
gender_summary_df


Unnamed: 0_level_0,Purchase Count,Total Purchase Value,Average Purchase Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,361.94,3.203009
Male,652,1967.64,3.017853
Other / Non-Disclosed,15,50.19,3.346


In [28]:
# calculate the average purchase per gender
gender_summary_df["Avg Total Purchase per Person"] = gender_summary_df["Total Purchase Value"]/gender_summary_df["Purchase Count"] 

gender_summary_df

Unnamed: 0_level_0,Purchase Count,Total Purchase Value,Average Purchase Price,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,361.94,3.203009,3.203009
Male,652,1967.64,3.017853,3.017853
Other / Non-Disclosed,15,50.19,3.346,3.346


In [20]:
grouped_person_df = purchase_data.groupby(["Gender","SN"])
avg_per_gender = grouped_person_df["Price"].mean()

In [29]:
gender_summary_df.dtypes

Purchase Count                     int64
Total Purchase Value             float64
Average Purchase Price           float64
Avg Total Purchase per Person    float64
dtype: object