In [None]:
### Note
* Instructions have been included for each segment.
You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

In [2]:
# File to Load (Remember to Change These)
file = "Resources/purchase_data.csv"

In [3]:
# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [92]:
total_players=len(purchase_data.SN.unique())
total_players_df= pd.DataFrame({"Total Players":[total_players]})
total_players_df

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [5]:
unique_items = len(purchase_data["Item ID"].unique())
unique_items

183

In [6]:
Average_Price =round(purchase_data["Price"].mean(),2)
Average_Price

3.05

In [7]:
Total_price = purchase_data["Price"].sum()
Total_price

2379.77

In [8]:
Total_purchase = purchase_data["Purchase ID"].count()
Total_purchase

780

In [9]:
Max_Price =round(purchase_data["Price"].max(),2)
Max_Price

4.99

In [10]:
Min_Price =round(purchase_data["Price"].min(),2)
Min_Price

1.0

In [96]:
summary_of_data = pd.DataFrame({
    "Number of unique items":[unique_items],
    "Average price":[Average_Price],
    "Maximum price":[Max_Price],
    "Minimum price":[Min_Price],
    "Total purchase":[Total_purchase],
    "Revenue":[Total_price] })

summary_of_data["Average price"] = summary_of_data["Average price"].map("${:.2f}".format)
summary_of_data["Revenue"] = summary_of_data["Revenue"].map("${:.2f}".format)
summary_of_data["Maximum price"] = summary_of_data["Maximum price"].map("${:.2f}".format)
summary_of_data["Minimum price"] = summary_of_data["Minimum price"].map("${:.2f}".format)

summary_of_data

Unnamed: 0,Number of unique items,Average price,Maximum price,Minimum price,Total purchase,Revenue
0,183,$3.05,$4.99,$1.00,780,$2379.77


In [12]:
#another way of showing the summary of the purchasing analysis
purchase_analysis= pd.DataFrame(round(purchase_data.describe(),2))
purchase_analysis

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.71,92.11,3.05
std,225.31,6.66,52.78,1.17
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,48.0,1.98
50%,389.5,22.0,93.0,3.15
75%,584.25,25.0,139.0,4.08
max,779.0,45.0,183.0,4.99


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [17]:
gender = pd.DataFrame(purchase_data[["SN","Gender"]].drop_duplicates("SN"))
gender = pd.DataFrame(gender.groupby("Gender")["Gender"].count())
gender = gender.rename(columns={"Gender":"Players by Gender"})


players = pd.DataFrame(round((gender["Players by Gender"] / total_players)*100,2).map("{0:.2f}%".format))
players= players.rename(columns={"Players by Gender":"Percentage of players by Gender"})

demographic= [gender,players]
gender_demographic= pd.concat(demographic,axis=1,join='inner')
gender_demographic

Unnamed: 0_level_0,Players by Gender,Percentage of players by Gender
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06%
Male,484,84.03%
Other / Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [33]:
#calculating the total purchase value
totalval_bygender= pd.DataFrame(purchase_data.groupby("Gender")["Price"].sum())
totalval_bygender = totalval_bygender.rename(columns = {"Price":"Total Purchase Value"})
totalval_bygender["Total Purchase Value"] = totalval_bygender["Total Purchase Value"].map("${:.2f}".format)

#calculating the total purchase count
itemct_bygender= pd.DataFrame(purchase_data.groupby("Gender")["Item Name"].count()).rename(columns={"Item Name":"Purchase Count"})

#calculating the total average puchase price
Avg_Price_bygender = pd.DataFrame(purchase_data.groupby("Gender")["Price"].mean())
Avg_Price_bygender["Price"] = Avg_Price_bygender["Price"].map("${:.2f}".format)
Avg_Price_bygender = Avg_Price_bygender.rename(columns={"Price":"Avgerage Purchase Price"})

#Avg_purchase_perperson = pd.DataFrame(totalval_bygender/itemct_bygender)


#Display the summary data frame
purchasing_analysis= [itemct_bygender,Avg_Price_bygender,totalval_bygender]
purchasing_analysis= pd.concat(purchasing_analysis,axis=1,join='inner')
purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Avgerage Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,$3.20,$361.94
Male,652,$3.02,$1967.64
Other / Non-Disclosed,15,$3.35,$50.19


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [39]:
age_list = purchase_data[["SN","Age","Price"]]
age_list.head()

Unnamed: 0,SN,Age,Price
0,Lisim78,20,3.53
1,Lisovynya38,40,1.56
2,Ithergue48,24,4.88
3,Chamassasya86,24,3.27
4,Iskosia90,23,1.44


In [41]:
# Bins for ages and group names
Ages= [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
age_groups= ["under 10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "over 40"]

age_list["Age Group"] = pd.cut(age_list["Age"], Ages, labels = age_groups)
age_list.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,SN,Age,Price,Age Group
0,Lisim78,20,3.53,20-24
1,Lisovynya38,40,1.56,over 40
2,Ithergue48,24,4.88,20-24
3,Chamassasya86,24,3.27,20-24
4,Iskosia90,23,1.44,20-24


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [50]:
avgprice_byage = pd.DataFrame(age_list.groupby("Age Group")["Price"].mean())
avgprice_byage = avgprice_byage.rename(columns={"Price":"Average Purchase Price"})
avgprice_byage["Average Purchase Price"] = avgprice_byage["Average Purchase Price"].map("${:.2f}".format)

purchasecnt_byage= pd.DataFrame(age_list.groupby("Age Group")["SN"].count())
purchasecnt_byage = purchasecnt_byage.rename(columns={"SN":"Purchases Count"})


total_purchase = pd.DataFrame(age_list.groupby("Age Group")["Price"].sum())
total_purchase = pd.DataFrame(total_purchase.rename(columns={"Price":"Total Purchase Value"}))
total_purchase = pd.DataFrame(total_purchase["Total Purchase Value"].map("${:.2f}".format))


purchasing_analysis_byage= [avgprice_byage ,purchasecnt_byage,total_purchase]
purchasing_analysis_byage= pd.concat(purchasing_analysis_byage,axis=1,join='inner')
purchasing_analysis_byage

Unnamed: 0_level_0,Average Purchase Price,Purchases Count,Total Purchase Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
under 10,$3.35,23,$77.13
10-14,$2.96,28,$82.78
15-19,$3.04,136,$412.89
20-24,$3.05,365,$1114.06
25-29,$2.90,101,$293.00
30-34,$2.93,73,$214.00
35-39,$3.60,41,$147.67
over 40,$2.94,13,$38.24


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [63]:
# Data Manipulation
top_spenders = pd.DataFrame(purchase_data.groupby("SN")["Price"].sum())
top_spenders = pd.DataFrame(top_spenders.sort_values(by="Price", ascending=False))
top_spenders = pd.DataFrame(top_spenders["Price"].map("${:.2f}".format))
top_spenders = pd.DataFrame(top_spenders.rename(columns = {"Price":"Total Purchase Value"}))


top_spender_ct = pd.DataFrame(purchase_data.groupby("SN")["Item Name"].count())
top_spender_ct = top_spender_ct.sort_values(by="Item Name", ascending=False)
top_spender_ct = pd.DataFrame(top_spender_ct.rename(columns = {"Item Name":"Purchase Count"}))

#building our data frame based on the manipulation before
top_spender_avgprice = pd.DataFrame(purchase_data.groupby("SN")["Price"].mean())
top_spender_avgprice = pd.DataFrame(top_spender_avgprice["Price"].map("${:.2f}".format))
top_spender_avgprice = pd.DataFrame(top_spender_avgprice.rename(columns = {"Price":"Average Purchase Price"}))

Top_spender_list= [top_spender_ct,top_spenders,top_spender_avgprice]
Top_spender_list= pd.concat(Top_spender_list,axis=1,join='inner')
Top_spender_list.head()

Unnamed: 0_level_0,Purchase Count,Total Purchase Value,Average Purchase Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$18.96,$3.79
Iral74,4,$13.62,$3.40
Idastidru52,4,$15.45,$3.86
Asur53,3,$7.44,$2.48
Inguron55,3,$11.11,$3.70


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [84]:

#first step is manipulating the data
item_group= purchase_data.groupby(["Item ID", "Item Name"])
item_group_count = item_group["SN"].count()
item_group_total = item_group["Price"].sum()
item_price = (item_group_total / item_group_count)
item_value = (item_price * item_group_count)

#Formatting our data fram after data manipulation 
Popular_Items = pd.DataFrame({
    "Purchase Count": item_group_count,
    "Item Price": item_price ,
    "Total Purchase Value": item_value})

Popular_Items = Popular_Items.sort_values("Purchase Count", ascending=False) 
Popular_Items["Item Price"] = Popular_Items["Item Price"].map("${:.2f}".format)
Popular_Items["Total Purchase Value"] = Popular_Items["Total Purchase Value"].map("${:.2f}".format)
Popular_Items = Popular_Items[["Purchase Count", "Item Price", "Total Purchase Value"]]
Popular_Items.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [83]:

item_group= purchase_data.groupby(["Item ID", "Item Name"])
item_group_countbygender = item_group["Gender"].count()
item_group_total = item_group["Price"].sum()
item_price = (item_group_total / item_group_countbygender)

#New Data Frame
profitable_items = pd.DataFrame({"Purchase Count": item_group_countbygender,
                          "Item Price": item_price,
                          "Total Purchase Value": item_group_total})

#Sort the above table by total purchase value in descending order
profitable_items = profitable_items.sort_values("Total Purchase Value", ascending=False) 
profitable_items["Item Price"] = profitable_items["Item Price"].map("${:.2f}".format)
profitable_items["Total Purchase Value"] = profitable_items["Total Purchase Value"].map("${:.2f}".format)
profitable_items = profitable_items[["Purchase Count", "Item Price", "Total Purchase Value"]]
profitable_items.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
92,Final Critic,8,$4.88,$39.04
103,Singed Scalpel,8,$4.35,$34.80
