In [185]:
# Dependencies and Setup
import os
import pandas as pd

path = os.path.join("Resources", "purchase_data.csv")

# Read Purchasing File and store into Pandas data frame
df = pd.read_csv(path)

In [186]:
df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,101,Final Critic,4.19


In [187]:
# Checking data types
df.dtypes

Purchase ID      int64
SN              object
Age              int64
Gender          object
Item ID          int64
Item Name       object
Price          float64
dtype: object

In [188]:
# Checking if duplicate signatures (SN) exist
df["SN"].value_counts()

Lisosia93      5
Idastidru52    4
Iral74         4
Iri67          3
Tyidaim51      3
              ..
Undosia27      1
Marjask87      1
Irillo49       1
Asur96         1
Minduli80      1
Name: SN, Length: 576, dtype: int64

In [189]:
# Player Count
# Assumption: Same names represent the same person

# Total players by counting unique signatures
totplayers = df["SN"].nunique()

In [190]:
# Print results as dataframe
totplayers_dict = {"Total Players": [totplayers]}
df_totplayers = pd.DataFrame(totplayers_dict, columns = ["Total Players"])
df_totplayers

Unnamed: 0,Total Players
0,576


In [191]:
# Purchasing Analysis (Total) - PAT

# Unique item count
pat1 = df["Item ID"].nunique()

In [192]:
# Average purchase price
pat2 = df.loc[df["Item ID"].unique(), "Price"].mean()

In [193]:
# Total number of purchases
pat3 = df["Purchase ID"].nunique()

In [194]:
# Total revenue
pat4 = df["Price"].sum()

In [195]:
# Print Purchasing Analysis (Total) results
df_pat = [{"Number of Unique Items":pat1, "Average Price":pat2, "Number of Purchases":pat3, "Total Revenue":pat4}]

df_puranal_tot = (pd.DataFrame(df_pat)).round({"Number of Unique Items":0, "Average Price":2, "Number of Purchases":0, "Total Revenue":2})

df_puranal_tot

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,3.11,780,2379.77


In [196]:
# Gender demographics

# Count of Male players
M_count = (df[df["Gender"] == "Male"]["SN"].unique()).size

# Percentage of Male players    
M_percent = str(round((M_count/totplayers*100), 2)) + "%"

In [197]:
#Count of Female players
F_count = (df[df["Gender"] == "Female"]["SN"].unique()).size

# Percentage of Female players    
F_percent = str(round((F_count/totplayers*100), 2)) + "%"

In [198]:
#Count of Other / Non-Disclosed players
ON_count = (df[df["Gender"] == "Other / Non-Disclosed"]["SN"].unique()).size

# Percentage of Female players    
ON_percent = str(round((ON_count/totplayers*100), 2)) + "%"

In [199]:
gender_demo = ({"Total Count":[F_count, M_count, ON_count],
              "Percentage of Players":[F_percent, M_percent, ON_percent]})

df_gender_demo = pd.DataFrame(gender_demo, index = ["Female", "Male", "Other / Non-Disclosed"])

df_gender_demo

Unnamed: 0,Total Count,Percentage of Players
Female,81,14.06%
Male,484,84.03%
Other / Non-Disclosed,11,1.91%


In [200]:
#Purchasing analysis (Gender)

# Purchase count by Gender
F_pcount = df.loc[df["Gender"] == "Female"]["Purchase ID"].count()
M_pcount = df.loc[df["Gender"] == "Male"]["Purchase ID"].count()
ON_pcount = df.loc[df["Gender"] == "Other / Non-Disclosed"]["Purchase ID"].count()

In [201]:
# Average purchase price by Gender

F_avgpp = round((df.loc[df["Gender"] == "Female", "Price"].mean()), 2)
M_avgpp = round((df.loc[df["Gender"] == "Male", "Price"].mean()), 2)
ON_avgpp = round((df.loc[df["Gender"] == "Other / Non-Disclosed", "Price"].mean()), 2) 

# FIX!! Rounding bug?
print(F_avgpp)

3.2


In [202]:
# Total purchase value by Gender

F_tpv = df.loc[df["Gender"] == "Female"]["Price"].sum()
M_tpv = df.loc[df["Gender"] == "Male"]["Price"].sum()
ON_tpv = df.loc[df["Gender"] == "Other / Non-Disclosed"]["Price"].sum()

In [203]:
# Average purchase total per person by Gender

F_avgpt = round((F_tpv/F_count), 2)
M_avgpt = round((M_tpv/M_count), 2)
ON_avgpt = round((ON_tpv/ON_count), 2)

In [211]:
# Saving results to new dataframe

puranal_gender = ({"Purchase Count":[F_pcount, M_pcount, ON_pcount], 
                "Average Purchase Price":[F_avgpp, M_avgpp, ON_avgpp],
                "Total Purchase Value":[F_tpv, M_tpv, ON_tpv],
                "Avg Total Purchase per Person":[F_avgpt, M_avgpt, ON_avgpt]})

df_puranal_gender = pd.DataFrame(puranal_gender)

df_puranal_gender2 = df_puranal_gender.set_index("Gender").sort_index(ascending=True)

df_puranal_gender2

KeyError: "None of ['Gender'] are in the columns"