### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [5]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

## Player Count

* Display the total number of players


In [8]:
purchase_data.head()
# 780 rows x 7 columns

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [6]:
purchase_data.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,91.755128,3.050987
std,225.310896,6.659444,52.697702,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,47.75,1.98
50%,389.5,22.0,92.0,3.15
75%,584.25,25.0,138.0,4.08
max,779.0,45.0,183.0,4.99


In [7]:
total_players = purchase_data["SN"].nunique()
print(total_players)

#576 unique players 

576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [8]:
# Number of Unique Item Names
unique_items = purchase_data["Item Name"].nunique()
print(unique_items)

179


In [9]:
# Average Purchase Price

average_price = purchase_data["Price"].mean()
print(average_price)

3.050987179487176


In [10]:
# Total Number of Purchases

total_purchases = purchase_data["Purchase ID"].nunique()
print(total_purchases)

780


In [11]:
# Total Revenue

total_revenue = purchase_data["Price"].sum()
print(total_revenue)

2379.77


In [12]:
summary_df = pd.DataFrame({"Unique Items": [unique_items],
                          "Average Price": average_price,
                          "Total Purchases": total_purchases,
                          "Total Revenue": total_revenue})
summary_df

Unnamed: 0,Unique Items,Average Price,Total Purchases,Total Revenue
0,179,3.050987,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [13]:
# duplicate play names removed 
gender_sn = purchase_data.loc[:,["Gender", "SN"]]
gender_sn = gender_sn.drop_duplicates()
print(gender_sn)

     Gender             SN
0      Male        Lisim78
1      Male    Lisovynya38
2      Male     Ithergue48
3      Male  Chamassasya86
4      Male      Iskosia90
..      ...            ...
773    Male         Hala31
774    Male     Jiskjask80
775  Female     Aethedru70
777    Male     Yathecal72
778    Male        Sisur91

[576 rows x 2 columns]


In [14]:
#This now takes into consideration the removal of duplicate SNs, and has gender broken out based on unqiue player
gender_total = gender_sn["Gender"].value_counts()
print(gender_total)


Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64


In [15]:
# gender count
gender_info = pd.DataFrame({"Total Count": gender_total})
gender_info

Unnamed: 0,Total Count
Male,484
Female,81
Other / Non-Disclosed,11


In [16]:
# gender percentage
gender_percentage = gender_total / total_players * 100
print(gender_percentage)

Male                     84.027778
Female                   14.062500
Other / Non-Disclosed     1.909722
Name: Gender, dtype: float64


In [17]:
summary_df = pd.DataFrame({"Gender Total": gender_total,
                           "Gender Percentage": gender_percentage})
                          
summary_df

Unnamed: 0,Gender Total,Gender Percentage
Male,484,84.027778
Female,81,14.0625
Other / Non-Disclosed,11,1.909722



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [18]:
# Calculate purchase count by gender based on Gender & Price columns 
purchase_analysis_gender_total = purchase_data.groupby(["Gender"]).count()["Price"]
print(purchase_analysis_gender_total)

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Price, dtype: int64


In [19]:
# Calculate average(mean) purchase price 
purchase_analysis_gender_average = purchase_data.groupby(["Gender"]).mean()["Price"]
print(purchase_analysis_gender_average)

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64


In [20]:
# Calculate total (sum) purchases dollar amounts by gender 
purchase_analysis_gender_total = purchase_data.groupby(["Gender"]).sum()["Price"]
print(purchase_analysis_gender_total)

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64


In [21]:
# Calculate average(mean) purchase total per person by gender
average_perchase_per_person_gender = purchase_analysis_gender_total/gender_total
print(average_perchase_per_person_gender)

Female                   4.468395
Male                     4.065372
Other / Non-Disclosed    4.562727
dtype: float64


In [22]:
# Create a summary data frame to hold the results

summary_df = pd.DataFrame({"Purchase Count": purchase_analysis_gender_total,
                           "Average Purchase Price": purchase_analysis_gender_average,
                          "Total Purchase Value": purchase_analysis_gender_total, 
                          "Avg Total Purchase Per Person": average_perchase_per_person_gender})
summary_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,361.94,3.203009,361.94,4.468395
Male,1967.64,3.017853,1967.64,4.065372
Other / Non-Disclosed,50.19,3.346,50.19,4.562727


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [23]:
# Establish bins for ages
# Need to break down to get the ages players into a dataframe

player_age = purchase_data.loc[:,["Age", "SN"]]
print(player_age)

     Age             SN
0     20        Lisim78
1     40    Lisovynya38
2     24     Ithergue48
3     24  Chamassasya86
4     23      Iskosia90
..   ...            ...
775   21     Aethedru70
776   21         Iral74
777   20     Yathecal72
778    7        Sisur91
779   24      Ennrian78

[780 rows x 2 columns]


In [25]:
# Establish bins for ages
# Need to remove SN (play IDs), so there's only a dataframe with the ages for the unique players

player_age = player_age.drop_duplicates()
print(player_age)

     Age             SN
0     20        Lisim78
1     40    Lisovynya38
2     24     Ithergue48
3     24  Chamassasya86
4     23      Iskosia90
..   ...            ...
773   21         Hala31
774   11     Jiskjask80
775   21     Aethedru70
777   20     Yathecal72
778    7        Sisur91

[576 rows x 2 columns]


In [26]:
# Establish bins for ages
# Players per age

player_age_current = player_age["Age"].value_counts()
print(player_age_current)

20    69
23    49
22    49
24    48
21    43
25    43
15    26
30    25
16    24
18    21
17    19
19    17
26    11
35    10
29    10
27     9
33     9
10     7
7      7
34     7
11     6
32     6
39     6
8      6
31     5
36     5
37     5
38     5
40     5
28     4
9      4
12     4
13     3
44     2
41     2
14     2
43     1
42     1
45     1
Name: Age, dtype: int64


In [27]:
# Categorize the existing players using the age bins.
# Lables for ages are < 10, 10-14, 15-19, 20-24, 25-29, 30-34, 35-39, 40+

age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]

labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

In [29]:
# Breakdown of players by age bins

age_demographic = pd.cut(player_age["Age"], age_bins, labels=labels).value_counts()
print(age_demographic)

20-24    258
15-19    107
25-29     77
30-34     52
35-39     31
10-14     22
<10       17
40+       12
Name: Age, dtype: int64


In [31]:
# Calculate the numbers and percentages by age group
age_group_percent = age_demographic/total_players * 100
print(age_group_percent)

20-24    44.791667
15-19    18.576389
25-29    13.368056
30-34     9.027778
35-39     5.381944
10-14     3.819444
<10       2.951389
40+       2.083333
Name: Age, dtype: float64


In [121]:
# Create a summary data frame to hold the results

age_group_percentage = pd.DataFrame({"Total Count": age_demographic, 
                                    "Percentage of Players": age_group_percent})

summary_df

#summary table isn't working

ValueError: Shape of passed values is (2, 2), indices imply (8, 2)

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [60]:
# Purchasing Analysis (Age)
# Bin the purchase_data data frame by age
purchase_data["Age Ranges"] = pd.cut(purchase_data["Age"], age_bins, labels=labels)
print(purchase_data)

     Purchase ID             SN  Age  Gender  Item ID  \
0              0        Lisim78   20    Male      108   
1              1    Lisovynya38   40    Male      143   
2              2     Ithergue48   24    Male       92   
3              3  Chamassasya86   24    Male      100   
4              4      Iskosia90   23    Male      131   
..           ...            ...  ...     ...      ...   
775          775     Aethedru70   21  Female       60   
776          776         Iral74   21    Male      164   
777          777     Yathecal72   20    Male       67   
778          778        Sisur91    7    Male       92   
779          779      Ennrian78   24    Male       50   

                                     Item Name  Price Range of Ages Age Ranges  
0    Extraction, Quickblade Of Trembling Hands   3.53         20-24      20-24  
1                            Frenzied Scimitar   1.56           40+        40+  
2                                 Final Critic   4.88         20-24     

In [62]:
# Purchase count by Age Ranges 
age_total_analysis = purchase_data.groupby(["Age Ranges"]).count()["Price"].rename("Purchase Count")
print(age_total_analysis)

Age Ranges
<10       23
10-14     28
15-19    136
20-24    365
25-29    101
30-34     73
35-39     41
40+       13
Name: Purchase Count, dtype: int64


In [65]:
# Average Purchase Price by Age Ranges
age_average_analysis = purchase_data.groupby(["Age Ranges"]).mean()["Price"].rename("Average Purchase Price")
print(age_average_analysis)

Age Ranges
<10      3.353478
10-14    2.956429
15-19    3.035956
20-24    3.052219
25-29    2.900990
30-34    2.931507
35-39    3.601707
40+      2.941538
Name: Average Purchase Price, dtype: float64


In [66]:
# Total Purchase Value by Age Ranges
age_total_value = purchase_data.groupby(["Age Ranges"]).sum()["Price"].rename("Total Purchase Value")
print(age_total_value)

Age Ranges
<10        77.13
10-14      82.78
15-19     412.89
20-24    1114.06
25-29     293.00
30-34     214.00
35-39     147.67
40+        38.24
Name: Total Purchase Value, dtype: float64


In [72]:
# Average Total Purchase per Person by Age Ranges
per_person_purchase_avg = age_total_value/age_demographic
print(per_person_purchase_avg)

<10      4.537059
10-14    3.762727
15-19    3.858785
20-24    4.318062
25-29    3.805195
30-34    4.115385
35-39    4.763548
40+      3.186667
dtype: float64


In [84]:
# Purchasing Analysis (Age) Summary Table
purchase_analysis_age = pd.DataFrame({"Purchase Count": age_total_analysis, 
                                     "Average Purchase Price": age_average_analysis, 
                                     "Total Purchase Value": age_total_value, 
                                     "Avg Total Purchase per Person": per_person_purchase_avg})

summary_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,361.94,3.203009,361.94,4.468395
Male,1967.64,3.017853,1967.64,4.065372
Other / Non-Disclosed,50.19,3.346,50.19,4.562727


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [85]:
# Top Spenders all players 
top_spenders = purchase_data.loc[:,["SN"]]
print(top_spenders)

                SN
0          Lisim78
1      Lisovynya38
2       Ithergue48
3    Chamassasya86
4        Iskosia90
..             ...
775     Aethedru70
776         Iral74
777     Yathecal72
778        Sisur91
779      Ennrian78

[780 rows x 1 columns]


In [86]:
# Top Spenders players with duplicates removed
top_spenders = purchase_data.drop_duplicates()
print(top_spenders)

     Purchase ID             SN  Age  Gender  Item ID  \
0              0        Lisim78   20    Male      108   
1              1    Lisovynya38   40    Male      143   
2              2     Ithergue48   24    Male       92   
3              3  Chamassasya86   24    Male      100   
4              4      Iskosia90   23    Male      131   
..           ...            ...  ...     ...      ...   
775          775     Aethedru70   21  Female       60   
776          776         Iral74   21    Male      164   
777          777     Yathecal72   20    Male       67   
778          778        Sisur91    7    Male       92   
779          779      Ennrian78   24    Male       50   

                                     Item Name  Price Range of Ages Age Ranges  
0    Extraction, Quickblade Of Trembling Hands   3.53         20-24      20-24  
1                            Frenzied Scimitar   1.56           40+        40+  
2                                 Final Critic   4.88         20-24     

In [97]:
# Purchase Count by SN

sn_purchase_count = purchase_data.groupby(["SN"]).count()["Price"].rename("Purchase Count")
print(sn_purchase_count)

SN
Adairialis76     1
Adastirin33      1
Aeda94           1
Aela59           1
Aelaria33        1
                ..
Yathecal82       3
Yathedeu43       2
Yoishirrala98    1
Zhisrisu83       2
Zontibe81        3
Name: Purchase Count, Length: 576, dtype: int64


In [109]:
# Average Purchase Price by SN
# Is there a way to have these in descending order?

sn_purchase_average = purchase_data.groupby(["SN"]).mean()["Price"].rename("Average Purchase Price")
print(sn_purchase_average)


SN
Adairialis76     2.280000
Adastirin33      4.480000
Aeda94           4.910000
Aela59           4.320000
Aelaria33        1.790000
                   ...   
Yathecal82       2.073333
Yathedeu43       3.010000
Yoishirrala98    4.580000
Zhisrisu83       3.945000
Zontibe81        2.676667
Name: Average Purchase Price, Length: 576, dtype: float64


In [115]:
# Total Purchase Price by SN
sn_purchase_total = purchase_data.groupby(["SN"]).sum()["Price"].rename("Total Purchase Value")
print(sn_purchase_total)

SN
Adairialis76     2.28
Adastirin33      4.48
Aeda94           4.91
Aela59           4.32
Aelaria33        1.79
                 ... 
Yathecal82       6.22
Yathedeu43       6.02
Yoishirrala98    4.58
Zhisrisu83       7.89
Zontibe81        8.03
Name: Total Purchase Value, Length: 576, dtype: float64


In [120]:
# Top Spenders summary table 
top_spender_summary = pd.DataFrame({"Purchase Count": sn_purchase_count, 
                                   "Average Purchase Price": sn_purchase_average,
                                   "Total Purchase Value": sn_purchase_total})

# summary table isn't populating the correct info


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,361.94,3.203009,361.94,4.468395
Male,1967.64,3.017853,1967.64,4.065372
Other / Non-Disclosed,50.19,3.346,50.19,4.562727


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
