In [75]:
%config IPCompleter.greedy=True

In [76]:
#Dependencies and Setup
import pandas as pd
import numpy as np

In [77]:
#File to Load
csv_file = 'Resources/HeroesOfPymoli.csv'

#Use Pandas to read in csv file 
heroes_df = pd.read_csv(csv_file, encoding="UTF-8")

In [78]:
heroes_df.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price'], dtype='object')

In [79]:
total_players = len(heroes_df["SN"].value_counts())

In [80]:
#PLAYER COUNT: Total # of Players:
# Find Number of Total Players and display as a DataFrame

total_players_dict = [{"Total Players": total_players}]

#Create DataFrame from Dictionary
total_players_df = pd.DataFrame(total_players_dict)
total_players_df

Unnamed: 0,Total Players
0,576


In [81]:
#PURCHASING ANALYSIS (TOTAL):
#Unique Items
unique_df = heroes_df['Item ID'].unique()

unique_count = 0
for value in unique_df:
    unique_count = unique_count + 1 

In [82]:
#Average Price
average_price = heroes_df["Price"].mean()

In [83]:
#Number of Purchases
purchases = heroes_df['Purchase ID'].count()

In [84]:
#Total Revenue
total_revenue = heroes_df['Price'].sum()

In [85]:
#Purchasing Analysis DataFrame
purchasings_analysis_df = pd.DataFrame([[unique_count, average_price, purchases, total_revenue]], columns=['Number of Unique Items','Average Price','Number of Purchases','Total Revenue'])
purchasings_analysis_df 

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.050987,780,2379.77


In [86]:
#GENDER DEMOGRAPHICS
#Count and Percentage of Female/Male Players

count_gender_df = heroes_df["Gender"].value_counts()
percent_gender_df = (heroes_df['Gender'].value_counts()/heroes_df['Gender'].count())*100

gender_analysis_df = pd.DataFrame()
gender_analysis_df['Total Count'],gender_analysis_df['Percentage'] = [count_gender_df,percent_gender_df]

gender_analysis_df


Unnamed: 0,Total Count,Percentage
Male,652,83.589744
Female,113,14.487179
Other / Non-Disclosed,15,1.923077


In [87]:
#Purchasing Analysis by Gender
male_purchase_df = pd.DataFrame
male_purchase_df = heroes_df.loc[heroes_df['Gender'] == "Male"]
male_purchase_count = male_purchase_df['Purchase ID'].count()
male_purchase_avg = male_purchase_df['Price'].mean()

In [88]:
female_purchase_df = pd.DataFrame
female_purchase_df = heroes_df.loc[heroes_df['Gender'] == "Female"]
female_purchase_count = female_purchase_df['Purchase ID'].count()
female_purchase_avg = female_purchase_df['Price'].mean()

In [89]:
other_purchase_df = pd.DataFrame
other_purchase_df = heroes_df.loc[heroes_df['Gender'] == "Other / Non-Disclosed"]
other_purchase_count = other_purchase_df['Purchase ID'].count()
other_purchase_avg = other_purchase_df['Price'].mean()

In [90]:
grouped_df = heroes_df.groupby(["Gender","SN"], as_index=False)['Price'].sum()

male_avgtotal_per_person = grouped_df[grouped_df['Gender']=='Male']['Price'].mean()
female_avgtotal_per_person = grouped_df[grouped_df['Gender']=='Female']['Price'].mean()
other_avgtotal_per_person = grouped_df[grouped_df['Gender']=="Other / Non-Disclosed"]['Price'].mean()

In [91]:
male_total_value = grouped_df[grouped_df["Gender"]=="Male"]["Price"].sum()
female_total_value = grouped_df[grouped_df["Gender"]=="Female"]["Price"].sum()
other_total_value = grouped_df[grouped_df["Gender"]=="Other / Non-Disclosed"]["Price"].sum()

In [92]:
#Purchasing Analysis by Gender DataFrame
purchase_analysis_gender = pd.DataFrame({"Gender":["Male","Female","Other / Non-Disclosed"],
                   "Purchase Count":[male_purchase_count,female_purchase_count,other_purchase_count],
                   "Average Purchase Price":[male_purchase_avg,female_purchase_avg,other_purchase_avg],
                   "Total Purchase Value":[male_total_value,female_total_value,other_total_value],
                   "Avg Total Purchase per Person":[male_avgtotal_per_person,female_avgtotal_per_person,other_avgtotal_per_person]})

purchase_analysis_gender.set_index('Gender')



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,652,3.017853,1967.64,4.065372
Female,113,3.203009,361.94,4.468395
Other / Non-Disclosed,15,3.346,50.19,4.562727


In [93]:
#Age Demographics
#Create bins
age_bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

#Add bins to dataframe
heroes_df["Age Group"] = pd.cut(heroes_df["Age"], age_bins, labels=labels, include_lowest=True)
heroes_df



Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24
...,...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54,20-24
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63,20-24
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46,20-24
778,778,Sisur91,7,Male,92,Final Critic,4.19,<10


In [94]:
#group dataframe by ages into a series
grouped_age = heroes_df.groupby("Age Group")
grouped_age
grouped_age.max()

Unnamed: 0_level_0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
<10,778,Tyeurith29,9,Male,179,"Wolf, Promise of the Moonwalker",4.93
10-14,774,Zhisrisu83,14,Male,164,Wolf,4.94
15-19,771,Yoishirrala98,19,Other / Non-Disclosed,183,Yearning Mageblade,4.91
20-24,779,Zontibe81,24,Other / Non-Disclosed,183,Yearning Mageblade,4.99
25-29,772,Yasur35,29,Other / Non-Disclosed,183,Worldbreaker,4.94
30-34,770,Yarolwen77,34,Male,178,Yearning Mageblade,4.93
35-39,768,Tyaerith73,39,Other / Non-Disclosed,179,"Wolf, Promise of the Moonwalker",4.91
40+,761,Salilis27,45,Male,173,Victor Iron Spikes,4.93


In [95]:
#Find Total Count of Purchases for each age group
purchases_by_age = grouped_age["SN"].nunique()

In [96]:
#Find Percentage of purchases by age group
percentage_purchases_by_age = (purchases_by_age/total_players)*100

In [97]:
#Dataframe of Age Information
age_demographics_df = pd.DataFrame({"Total Count":purchases_by_age, "Percentages": percentage_purchases_by_age})
age_demographics_df 

Unnamed: 0_level_0,Total Count,Percentages
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.951389
10-14,22,3.819444
15-19,107,18.576389
20-24,258,44.791667
25-29,77,13.368056
30-34,52,9.027778
35-39,31,5.381944
40+,12,2.083333


In [98]:
#Purchase Count by age
age_purchase_count = grouped_age["SN"].count()

Age Group
<10       23
10-14     28
15-19    136
20-24    365
25-29    101
30-34     73
35-39     41
40+       13
Name: SN, dtype: int64

In [100]:
#Average Purchase Price by age
avg_age_purchase = grouped_age["Price"].mean()

Age Group
<10      3.353478
10-14    2.956429
15-19    3.035956
20-24    3.052219
25-29    2.900990
30-34    2.931507
35-39    3.601707
40+      2.941538
Name: Price, dtype: float64

In [101]:
#Total Purchase Value by age
total_age_purchase = grouped_age["Price"].sum()

Age Group
<10        77.13
10-14      82.78
15-19     412.89
20-24    1114.06
25-29     293.00
30-34     214.00
35-39     147.67
40+        38.24
Name: Price, dtype: float64

In [102]:
#Average Total Purchase per Person by age
avg_total_per_person_age = total_age_purchase/purchases_by_age

Age Group
<10      4.537059
10-14    3.762727
15-19    3.858785
20-24    4.318062
25-29    3.805195
30-34    4.115385
35-39    4.763548
40+      3.186667
dtype: float64

In [105]:
age_demographics_2_df = pd.DataFrame({"Purchase Count":age_purchase_count, "Avg. Purchase Price": avg_age_purchase,
                                     "Total Purchase Value":total_age_purchase, "Avg. Total Purchase per Person": avg_total_per_person_age})
age_demographics_2_df 

Unnamed: 0_level_0,Purchase Count,Avg. Purchase Price,Total Purchase Value,Avg. Total Purchase per Person
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,3.353478,77.13,4.537059
10-14,28,2.956429,82.78,3.762727
15-19,136,3.035956,412.89,3.858785
20-24,365,3.052219,1114.06,4.318062
25-29,101,2.90099,293.0,3.805195
30-34,73,2.931507,214.0,4.115385
35-39,41,3.601707,147.67,4.763548
40+,13,2.941538,38.24,3.186667
