### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [13]:
# Dependencies and Setup
import pandas as pd

In [14]:
# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

In [15]:
# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [41]:
total_players = purchase_data['SN'].nunique()
total_players

576

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [6]:
unique_items = purchase_data['Item Name'].nunique()
unique_items

179

In [7]:
average_items = round(purchase_data['Price'].mean(),2)
average_items

3.05

In [8]:
purchase_count = len(purchase_data['Purchase ID'])
purchase_count

780

In [9]:
total_revenue = purchase_data['Price'].sum()
total_revenue

2379.77

In [10]:
format_revenue = "{:,.2f}".format((total_revenue))
format_revenue

'2,379.77'

In [11]:
purchase_analysis = pd.DataFrame({"Number of Unique Items": [unique_items],
                                  "Average Price": "$"+str(average_items),
                                  "Number of Purchases": [purchase_count],
                                  "Total Revenue": "$"+str(format_revenue)})
purchase_analysis

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [169]:
revised_purchase_list = purchase_data[["SN","Gender"]]
revised_purchase_list 

Unnamed: 0,SN,Gender
0,Lisim78,Male
1,Lisovynya38,Male
2,Ithergue48,Male
3,Chamassasya86,Male
4,Iskosia90,Male
...,...,...
775,Aethedru70,Female
776,Iral74,Male
777,Yathecal72,Male
778,Sisur91,Male


In [170]:
revised_purchase_list = revised_purchase_list.drop_duplicates()
revised_purchase_list

Unnamed: 0,SN,Gender
0,Lisim78,Male
1,Lisovynya38,Male
2,Ithergue48,Male
3,Chamassasya86,Male
4,Iskosia90,Male
...,...,...
773,Hala31,Male
774,Jiskjask80,Male
775,Aethedru70,Female
777,Yathecal72,Male


In [30]:
gender_counts = revised_purchase_list['Gender'].value_counts()
gender_counts

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [78]:
male_counts =  gender_counts['Male']
female_counts = gender_counts['Female']
other_counts = gender_counts['Other / Non-Disclosed']

In [83]:
male_percentage =  "{:,.2%}".format(male_counts / total_players)
female_percentage = "{:,.2%}".format(female_counts / total_players)
other_percentage =  "{:,.2%}".format(other_counts / total_players)
# other_percentage

'1.91%'

In [85]:
gender_percentage = (male_percentage), (female_percentage), (other_percentage)
gender_percentage

('84.03%', '14.06%', '1.91%')

In [120]:
gender_index = pd.DataFrame(gender_counts)
gender_index = gender_index.rename(
    columns={"Gender": "Total Count"})
gender_index

Unnamed: 0,Total Count
Male,484
Female,81
Other / Non-Disclosed,11


In [121]:
gender_index["Percentage of Players"] = gender_percentage
gender_index

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [127]:
full_gender_count = purchase_data['Gender'].value_counts()
full_gender_count

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [113]:
fem_purchase_data = purchase_data.loc[purchase_data['Gender'] =="Female"]
male_purchase_data = purchase_data.loc[purchase_data['Gender'] =="Male"]
other_purchase_data = purchase_data.loc[purchase_data['Gender'] =="Other / Non-Disclosed"]

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58
22,22,Siarithria38,38,Other / Non-Disclosed,24,Warped Fetish,3.81
82,82,Haerithp41,16,Other / Non-Disclosed,160,Azurewrath,4.4
111,111,Sundim98,21,Other / Non-Disclosed,41,Orbit,4.75
228,228,Jiskirran77,20,Other / Non-Disclosed,80,Dreamsong,3.39
237,237,Idairin51,20,Other / Non-Disclosed,16,Restored Bauble,3.55
242,242,Eodaisu60,16,Other / Non-Disclosed,39,"Betrayal, Whisper of Grieving Widows",3.94
291,291,Idairin51,20,Other / Non-Disclosed,35,Heartless Bone Dualblade,3.45
350,350,Rairith81,15,Other / Non-Disclosed,34,Retribution Axe,2.22
401,401,Lirtim36,15,Other / Non-Disclosed,46,Hopeless Ebon Dualblade,1.33


In [152]:
fem_purchase_count = len(fem_purchase_data['Purchase ID'])
fem_total_revenue = fem_purchase_data['Price'].sum()
fem_average_item = "$"+"{:,.2f}".format(fem_total_revenue/fem_purchase_count)
format_fem_revenue = "$"+"{:,.2f}".format(fem_total_revenue)
format_fem_revenue

'$361.94'

In [151]:
male_purchase_count = len(male_purchase_data['Purchase ID'])
male_total_revenue = male_purchase_data['Price'].sum()
male_average_item ="$"+"{:,.2f}".format(male_total_revenue/male_purchase_count)
format_male_revenue = "$"+"{:,.2f}".format(male_total_revenue)
format_male_revenue

'$1,967.64'

In [153]:
other_purchase_count = len(other_purchase_data['Purchase ID'])
other_total_revenue = other_purchase_data['Price'].sum()
other_average_item = "$"+"{:,.2f}".format(other_total_revenue/other_purchase_count)
format_other_revenue = "$"+"{:,.2f}".format(other_total_revenue)
format_other_revenue

'$50.19'

In [155]:
combined_average_price = (male_average_item), (fem_average_item), (other_average_item)
combined_total_revenue = (format_male_revenue), (format_fem_revenue), (format_other_revenue)
combined_average_price

('$3.02', '$3.20', '$3.35')

In [129]:
gender_purchase_analysis = pd.DataFrame(full_gender_count)
gender_purchase_analysis = gender_purchase_analysis.rename(
    columns={"Gender": "Purchase Count"})


Unnamed: 0,Purchase Count
Male,652
Female,113
Other / Non-Disclosed,15


In [157]:
gender_purchase_analysis["Average Purchase Price"] = combined_average_price
gender_purchase_analysis["Total Purchase Value"] = combined_total_revenue
gender_purchase_analysis

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value
Male,652,$3.02,"$1,967.64"
Female,113,$3.20,$361.94
Other / Non-Disclosed,15,$3.35,$50.19


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [314]:
bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9,39.9, 100]
age_range =["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40 >"]

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [315]:
revised_age_data = purchase_data[["SN","Age","Gender"]]
revised_age_data = revised_age_data.drop_duplicates()
revised_age_data

Unnamed: 0,SN,Age,Gender
0,Lisim78,20,Male
1,Lisovynya38,40,Male
2,Ithergue48,24,Male
3,Chamassasya86,24,Male
4,Iskosia90,23,Male
...,...,...,...
773,Hala31,21,Male
774,Jiskjask80,11,Male
775,Aethedru70,21,Female
777,Yathecal72,20,Male


In [316]:
revised_age_data["Age Range"] = pd.cut(revised_age_data["Age"], bins, labels=age_range, include_lowest=True)
revised_age_data

Unnamed: 0,SN,Age,Gender,Age Range
0,Lisim78,20,Male,20-24
1,Lisovynya38,40,Male,40 >
2,Ithergue48,24,Male,20-24
3,Chamassasya86,24,Male,20-24
4,Iskosia90,23,Male,20-24
...,...,...,...,...
773,Hala31,21,Male,20-24
774,Jiskjask80,11,Male,10-14
775,Aethedru70,21,Female,20-24
777,Yathecal72,20,Male,20-24


In [317]:
new_age_data = revised_age_data[["Age","Age Range"]]
new_age_data

Unnamed: 0,Age,Age Range
0,20,20-24
1,40,40 >
2,24,20-24
3,24,20-24
4,23,20-24
...,...,...
773,21,20-24
774,11,10-14
775,21,20-24
777,20,20-24


In [318]:
sort_age_data = new_age_data.sort_values("Age Range")
sort_age_data

Unnamed: 0,Age,Age Range
778,7,<10
446,7,<10
81,8,<10
33,7,<10
37,8,<10
...,...,...
667,40,40 >
477,41,40 >
674,43,40 >
761,45,40 >


In [319]:
count_age_data = sort_age_data['Age Range'].value_counts()
count_age_data

20-24    258
15-19    107
25-29     77
30-34     52
35-39     31
10-14     22
<10       17
40 >      12
Name: Age Range, dtype: int64

In [305]:
revised_age_data["Total Count"] = pd.cut(revised_age_data["Age"], bins, labels=age_range)
age_count = revised_age_data['Total Count'].value_counts()
age_count

20-24    258
15-19    107
25-29     77
30-34     52
35-39     31
10-14     22
<10       17
40>       12
Name: Total Count, dtype: int64

In [307]:
sort_age_count = age_count.sort_values(ascending=True)
sort_age_count

40>       12
<10       17
10-14     22
35-39     31
30-34     52
25-29     77
15-19    107
20-24    258
Name: Total Count, dtype: int64

In [265]:
age_purchase_analysis = pd.DataFrame(age_count)
age_purchase_analysis = age_purchase_analysis.rename(
    columns={"Age Count": "Total Count"})
age_purchase_analysis

Unnamed: 0,Total Count
20-24,258
15-19,107
25-29,77
30-34,52
35-39,31
10-14,22
<10,17
40+,12


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
