In [1]:
## Heroes of Pymoli 
## import dependencies and create path for csv

import pandas as pd

csvreader = pd.read_csv("purchase_data.csv")
data_df = pd.DataFrame(csvreader)
data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [2]:
#Count the players by counting distinct names
players = data_df.SN.unique()
player_count = len(players)
print(f'{player_count} players')

576 players


In [3]:
#Run basic calculations to obtain number of unique items, average price, etc.
#  Unique Items
data_df.rename(columns = {'Item ID':'Item_ID'}, inplace = True)
unique_items = data_df.Item_ID.unique()
print(f'UNIQUE ITEMS {len(unique_items)}')
#  average price
price_sum = data_df['Price'].sum()
avg_price = price_sum / player_count
print(f'AVERAGE PRICE {avg_price}')
print(f'TOTAL REVENUE {price_sum} USD')


UNIQUE ITEMS 179
AVERAGE PRICE 4.131545138888889
TOTAL REVENUE 2379.77 USD


In [4]:
name_condensed_df = data_df.groupby('SN')
name_condensed_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item_ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [5]:
#Percentage and Count of Male Players
only_men_df = data_df.loc[data_df['Gender'] == 'Male', :]
men_count = len(only_men_df.SN.unique())
print(f'MEN: {men_count}')
men_percent = 100 * men_count / player_count

#Percentage and Count of Female Players
only_women_df = data_df.loc[data_df['Gender'] == 'Female', :]
women_count = len(only_women_df.SN.unique())
print(f'FEMALE: {women_count}')
women_percent = 100 * women_count / player_count

#Percentage and Count of Other / Non-Disclosed
only_nb_df = data_df.loc[data_df['Gender'] == 'Other / Non-Disclosed', :]
nb_count = len(only_nb_df.SN.unique())
print(f'OTHER: {nb_count}')
nb_percent = 100 * nb_count / player_count


only_men_df.head()

MEN: 484
FEMALE: 81
OTHER: 11


Unnamed: 0,Purchase ID,SN,Age,Gender,Item_ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [6]:
#Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender
#  MEN
price_sum_men = only_men_df['Price'].sum()
avg_price_men = price_sum_men / men_count
male_purchases = len(only_men_df.SN)
avg_male_purchase = price_sum_men / male_purchases

#  WOMEN
price_sum_women = only_women_df['Price'].sum()
avg_price_women = price_sum_women / women_count
female_purchases = len(only_women_df.SN)
avg_female_purchase = price_sum_women / female_purchases

#  NB
price_sum_nb = only_nb_df['Price'].sum()
avg_price_nb = price_sum_nb / nb_count
nb_purchases = len(only_nb_df.SN)
avg_nb_purchase = price_sum_nb / nb_purchases

#Create a summary data frame to hold the results
purchase_data = {'Gender':['Male', 'Female', 'Other/Non-Disclosed'],
                'Purchase Count':[male_purchases, female_purchases, nb_purchases],
                'Average Purchase Price':[avg_male_purchase, avg_female_purchase, avg_nb_purchase],
                'Total Purchase Value':[price_sum_men, price_sum_women, price_sum_nb],
                'Average Purchase per Person':[avg_price_men, avg_price_women, avg_price_nb]}
purchase_data_df = pd.DataFrame(purchase_data)

#Display the summary data frame
print(purchase_data_df)

                Gender  Purchase Count  Average Purchase Price  \
0                 Male             652                3.017853   
1               Female             113                3.203009   
2  Other/Non-Disclosed              15                3.346000   

   Total Purchase Value  Average Purchase per Person  
0               1967.64                     4.065372  
1                361.94                     4.468395  
2                 50.19                     4.562727  


In [7]:
#Establish bins for ages
bins = [0, 10, 20, 30, 40, 100]
binlabels = ["kids", "teens", "young adults", "adults", "older adults"]
 
#Categorize the existing players using the age bins.
data_df["Age Breakdown"] = pd.cut(data_df["Age"], bins, labels=binlabels, include_lowest=True)

#Calculate the numbers and percentages by age group
age_groups = pd.value_counts(data_df["Age Breakdown"])
age_groups = pd.DataFrame(age_groups)
purchase_count = age_groups['Age Breakdown'].sum()

def percentify(x): # IN: data from the 'Age breakdown' column
    return 100 * x / purchase_count # OUT

age_groups['percent'] = percentify(age_groups['Age Breakdown'])

#Display Age Demographics Table
print(age_groups)
 

              Age Breakdown    percent
young adults            402  51.538462
teens                   254  32.564103
adults                   85  10.897436
kids                     32   4.102564
older adults              7   0.897436


In [12]:
#Bin the purchase_data_df data frame by age
bins = [0, 10, 20, 30, 40, 100]
binlabels = ["kids", "teens", "young adults", "adults", "older adults"]

age_purchases_df = pd.cut(data_df["Age"], bins, labels=binlabels, include_lowest=True)
age_purchases_df = pd.DataFrame(age_purchases_df)
age_purchases_df = pd.merge(age_purchases_df, data_df[["SN", "Item Name", "Price"]], left_index=True, right_index=True)
age_purchases_df.head()

#Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


#Create a summary data frame to hold the results


#Optional: give the displayed data cleaner formatting


#Display the summary data frame

Unnamed: 0,Age,SN,Item Name,Price
0,teens,Lisim78,"Extraction, Quickblade Of Trembling Hands",3.53
1,adults,Lisovynya38,Frenzied Scimitar,1.56
2,young adults,Ithergue48,Final Critic,4.88
3,young adults,Chamassasya86,Blindscythe,3.27
4,young adults,Iskosia90,Fury,1.44


In [None]:
#Create a summary data frame to hold the results


#Sort the total purchase value column in descending order


#Optional: give the displayed data cleaner formatting


#Display a preview of the summary data frame

In [None]:
#Retrieve the Item ID, Item Name, and Item Price columns


#Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


#Create a summary data frame to hold the results


#Sort the purchase count column in descending order


#Optional: give the displayed data cleaner formatting


#Display a preview of the summary data frame

In [None]:
#Sort the above table by total purchase value in descending order


#Optional: give the displayed data cleaner formatting


#Display a preview of the data frame