In [2]:
import pandas as pd

In [26]:
csvpath = 'Resources/purchase_data.csv'
purchases_df = pd.read_csv(csvpath)
purchases_df.head(30)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
5,5,Yalae81,22,Male,81,Dreamkiss,3.61
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67
8,8,Undjask33,22,Male,21,Souleater,1.1
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58


In [4]:
# Total Players
total_players = len(purchases_df['SN'].unique())
print(f'Total number of House of Pymoli players is {total_players}')

Total number of House of Pymoli players is 576


In [5]:
# Purchasing Analysis
## Total Items

total_items = len(purchases_df['Item ID'].unique())
print(total_items)

179


In [6]:
# Average Purchase Price

price_sum = purchases_df['Price'].sum()
average_purchase_price = price_sum/total_items
print(round(average_purchase_price, 2))

13.29


In [7]:
# Total Purchases

total_purchases = len(purchases_df)
print(total_purchases)

780


In [8]:
print(f'The total Revenue is {price_sum}')

The total Revenue is 2379.77


In [9]:
## Create new data frame for user info

user_info_df = purchases_df[['SN','Gender','Age']]

clean_user_df = user_info_df.drop_duplicates(keep='first')
print(clean_user_df)

genders = user_info_df['Gender'].unique()
print(genders)

                SN  Gender  Age
0          Lisim78    Male   20
1      Lisovynya38    Male   40
2       Ithergue48    Male   24
3    Chamassasya86    Male   24
4        Iskosia90    Male   23
..             ...     ...  ...
773         Hala31    Male   21
774     Jiskjask80    Male   11
775     Aethedru70  Female   21
777     Yathecal72    Male   20
778        Sisur91    Male    7

[576 rows x 3 columns]
['Male' 'Other / Non-Disclosed' 'Female']


In [10]:
## Gender Demographics


female_players = clean_user_df.loc[clean_user_df['Gender']=='Female']
total_female = len(female_players)
percent_female = round((total_female/total_players)*100,1)
print(f'The number of female players is {total_female} or {percent_female}% of total players.')


male_players = clean_user_df.loc[clean_user_df['Gender']=='Male']
total_male = len(male_players)
percent_male = round((total_male/total_players)*100,1)
print(f'The number of female players is {total_male} or {percent_male}% of total players.')

other_players = clean_user_df.loc[clean_user_df['Gender']=='Other / Non-Disclosed']
total_other = len(other_players)
percent_other = round((total_other/total_players)*100,1)
print(f'The number of female players is {total_other} or {percent_other}% of total players.')



The number of female players is 81 or 14.1% of total players.
The number of female players is 484 or 84.0% of total players.
The number of female players is 11 or 1.9% of total players.


In [11]:
## Female total purchases

female_df = purchases_df.loc[purchases_df['Gender']=='Female']
female_count = female_df['Item ID'].count()
print(female_count)
male_df = purchases_df.loc[purchases_df['Gender']=='Male']
male_count = male_df['Item ID'].count()
print(male_count)
other_df = purchases_df.loc[purchases_df['Gender']=='Other / Non-Disclosed']
other_count = other_df['Item ID'].count()
print(other_count)

113
652
15


In [12]:
## Purchasing Analysis (Gender)


gender_group_df = purchases_df.groupby(['Gender']).sum()
print(gender_group_df)


female_total_price = round(gender_group_df.iloc[0,3],2)
print(female_total_price)
male_total_price = round(gender_group_df.iloc[1,3],2)
print(male_total_price)
other_total_price = round(gender_group_df.iloc[2,3],2)
print(other_total_price)

total_purchases = (female_total_price+male_total_price+other_total_price)
print(total_purchases)

                       Purchase ID    Age  Item ID    Price
Gender                                                     
Female                       42870   2412     9659   361.94
Male                        255921  14942    60698  1967.64
Other / Non-Disclosed         5019    363     1212    50.19
361.94
1967.64
50.19
2379.77


In [13]:
## Average Purchase Price 

female_ave_price = round(female_total_price/female_count,2)
print(female_ave_price)
male_ave_price = round(male_total_price/male_count,2)
print(male_ave_price)
other_ave_price = round(other_total_price/other_count,2)
print(other_ave_price)

3.2
3.02
3.35


In [25]:
## Create new Data Frame for Age Analysis

unique_age_df = purchases_df[['SN','Age','Purchase ID','Item Name','Price']]
unique_age_df.head(30)

Unnamed: 0,SN,Age,Purchase ID,Item Name,Price
0,Lisim78,20,0,"Extraction, Quickblade Of Trembling Hands",3.53
1,Lisovynya38,40,1,Frenzied Scimitar,1.56
2,Ithergue48,24,2,Final Critic,4.88
3,Chamassasya86,24,3,Blindscythe,3.27
4,Iskosia90,23,4,Fury,1.44
5,Yalae81,22,5,Dreamkiss,3.61
6,Itheria73,36,6,"Interrogator, Blood Blade of the Queen",2.18
7,Iskjaskst81,20,7,Abyssal Shard,2.67
8,Undjask33,22,8,Souleater,1.1
9,Chanosian48,35,9,Ghastly Adamantite Protector,3.58


In [15]:
## Age Demographics
age_demo_df = clean_user_df.copy()

bins = [0, 20, 30, 40, 100]
age_groups = ['20 and Under', '21 to 30', '31 to 40','41 and Older']

age_demo_df['Age Group'] = pd.cut(age_demo_df['Age'], bins, labels = age_groups, include_lowest=True)
no_dup_age_df = age_demo_df.groupby(['Age Group']).count()


uni_teens = no_dup_age_df.iloc[0,0]
uni_yg_adults = no_dup_age_df.iloc[1,0]
uni_adults = no_dup_age_df.iloc[2,0]
uni_mid_adults = no_dup_age_df.iloc[3,0]

print(uni_teens)
print(uni_yg_adults)
print(uni_adults)
print(uni_mid_adults)



unique_age_df['Age Group'] = pd.cut(unique_age_df['Age'], bins, labels = age_groups, include_lowest=True)
age_group_df = unique_age_df.groupby(['Age Group']).count()
age_ave_price = unique_age_df.groupby(['Age Group']).mean()
age_total_price = unique_age_df.groupby(['Age Group']).sum()


ave_price_table = round(age_ave_price['Price'],2)
total_price_table = round(age_total_price['Price'],2)


ave_teens = round(age_ave_price.iloc[0,2],2)
ave_young_adults = round(age_ave_price.iloc[1,2],2)
ave_adults = round(age_ave_price.iloc[2,2],2)
ave_mid_age_adults = round(age_ave_price.iloc[3,2],2)

print(ave_price_table)
print(ave_teens)  # Format 2 decimal places later

## Total Purchases
total_teens = round(age_total_price.iloc[0,2],2)
total_young_adults = round(age_total_price.iloc[1,2],2)
total_adults = round(age_total_price.iloc[2,2],2)
total_mid_age_adults = round(age_total_price.iloc[3,2],2)

print(age_total_price)
print(total_teens)



215
291
63
7
Age Group
20 and Under    3.10
21 to 30        2.99
31 to 40        3.15
41 and Older    3.08
Name: Price, dtype: float64
3.1
               Age  Purchase ID    Price
Age Group                               
20 and Under  4764       112905   887.12
21 to 30      9667       155384  1203.06
31 to 40      2986        31735   268.06
41 and Older   300         3786    21.53
887.12


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unique_age_df['Age Group'] = pd.cut(unique_age_df['Age'], bins, labels = age_groups, include_lowest=True)


In [16]:
## Average Purchase Total per Person by Age Group

ave_tot_per_teen = round(total_teens/uni_teens, 2)
print(ave_tot_per_teen)
ave_tot_per_yg_adult = round(total_young_adults/uni_yg_adults, 2)
print(ave_tot_per_yg_adult)
ave_tot_per_adult = round(total_adults/uni_adults, 2)
print(ave_tot_per_adult)
ave_tot_per_mid_adult = round(total_mid_age_adults/uni_mid_adults, 2)
print(ave_tot_per_mid_adult)

4.13
4.13
4.25
3.08


In [83]:
## Identify the the top 5 spenders in the game by total purchase value, then list (in a table):

new_group = unique_age_df.groupby(['SN']).agg({'Purchase ID':['count'],
                                                    'Price':['sum']})
new_group.columns = new_group.columns.map('_'.join)

sorted_group = new_group.sort_values(['Price_sum'],ascending=False).head()

mylist = []
for i in range(0,5):
    price_sum = sorted_group['Price_sum'][i]
    purchase_count = sorted_group['Purchase ID_count'][i]
    average_price2 = round(price_sum/purchase_count,2)
    mylist.append(average_price2)


sorted_group['Average purchase Price'] = mylist
sorted_group.reset_index(drop=False)
sorted_group = sorted_group.rename(columns={'Purchase ID_count':'Purchases',
                                           'Price_sum':'Total Price',
                                            'Average purchase Price':'Average Price'})
sorted_group.reset_index(drop=False)

Unnamed: 0,SN,Purchases,Total Price,Average Price
0,Lisosia93,5,18.96,3.79
1,Idastidru52,4,15.45,3.86
2,Chamjask73,3,13.83,4.61
3,Iral74,4,13.62,3.4
4,Iskadarya95,3,13.1,4.37
