### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

## Player Count

* Display the total number of players


In [2]:
playcount = purchase_data["SN"].nunique()

print ("Total players: "+ str(playcount))


Total players: 576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
#unique items calculator
uniquei = purchase_data["Item ID"].nunique()
# average price calculator
avgprice = purchase_data["Price"].mean()
#number purchases
purnum = purchase_data["Purchase ID"].count()
#total revenue
totalrev = purchase_data["Price"].sum()
puranalysis = [{
    'unique items' : uniquei,
    'average price' : avgprice,
    'number of purchases' : purnum,
    'total revenue' : totalrev,
}]
purchase_df = pd.DataFrame(puranalysis) 

purchase_df

Unnamed: 0,unique items,average price,number of purchases,total revenue
0,179,3.050987,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [4]:
gender_df = purchase_data[["Gender"]]

gender_m = gender_df.loc[purchase_data["Gender"] =="Male" ]
gender_f =gender_df.loc[purchase_data["Gender"] =="Female" ]
gender_o = gender_df.loc[purchase_data["Gender"] =="Other" ]



gendermcount = gender_m.count(axis =0)
genderfcount = gender_f.count(axis =0)
genderocount = gender_o.count(axis =0)
total = gender_df.count()

#calc
gm_calc = gendermcount/total * 100
gf_calc = genderfcount/total * 100
go_calc = genderocount/total * 100


print("The number of Male players is: " +  str(gendermcount) + " making up " + str(gm_calc) + "%")

#print(gender_f)
print("The number of Female players is: " +  str(genderfcount) + " making up " + str(gf_calc) + "%")
#print(gender_o)
print("The number of Other players is: " +  str(genderocount) + " making up " + str(go_calc) + "%")

The number of Male players is: Gender    652
dtype: int64 making up Gender    83.589744
dtype: float64%
The number of Female players is: Gender    113
dtype: int64 making up Gender    14.487179
dtype: float64%
The number of Other players is: Gender    0
dtype: int64 making up Gender    0.0
dtype: float64%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [25]:

puranalysiscount = purchase_data.groupby("Gender")['Price'].count()
puranalysismean = purchase_data.groupby("Gender")['Price'].mean()
puranalysistot = purchase_data.groupby("Gender")['Price'].sum()




sumframe = {
    'Number of Purchases' :  puranalysiscount,
    'Average Price of Purchase' : puranalysismean,
    'total $ amount' : puranalysistot,
}
sumframedf = pd.DataFrame(sumframe)
sumframedf


Unnamed: 0_level_0,Number of Purchases,Average Price of Purchase,total $ amount
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,3.203009,361.94
Male,652,3.017853,1967.64
Other / Non-Disclosed,15,3.346,50.19


## Age Demographics

In [24]:
#bin names and labels
agebins = [0,9.9,13.9,18.9,23.9,28.9,33.9,38.9, 99]
binnames = ['<10', '10-14', '15-19','20-24', '25-29', '30-34','35-39','40+']
#bin dataframe


age_df = pd.DataFrame(purchase_data)
age_df["Number of players by Age"] = pd.cut(age_df['Age'], agebins, labels = binnames, include_lowest=True)
age_df = age_df[['Number of players by Age', 'Age']]
age_df = age_df.groupby(age_df['Number of players by Age']).count()
age_df['Percent'] = age_df['Age']/playcount * 100


age_df


Unnamed: 0_level_0,Age,Percent
Number of players by Age,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,23,3.993056
10-14,26,4.513889
15-19,115,19.965278
20-24,321,55.729167
25-29,155,26.909722
30-34,77,13.368056
35-39,44,7.638889
40+,19,3.298611


* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [26]:
purage_df= []
purage_df = pd.DataFrame(purchase_data)
purage_df["Players Age"] = pd.cut(purage_df['Age'], agebins, labels = binnames, include_lowest=True)



playanalysiscount = purchase_data.groupby("SN")['Price'].count()
playanalysismean = purchase_data.groupby("SN")['Price'].mean()
playanalysistot = purchase_data.groupby("SN")['Price'].sum()

purchaseframe = {
    'Number of Purchases' :  playanalysiscount,
    'Average Price of Purchase' : playanalysismean,
    'Total  $ Amount' : playanalysistot,
}

purchaseframe_df = pd.DataFrame(purchaseframe)
purchaseframe_df = purchaseframe_df.sort_values('Total  $ Amount', ascending=False)
purchaseframe_df.head()

Unnamed: 0_level_0,Number of Purchases,Average Price of Purchase,Total $ Amount
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Idastidru52,4,3.8625,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.405,13.62
Iskadarya95,3,4.366667,13.1


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [15]:
#groupby 'SN'
top_spend = purchase_data.set_index("SN")
top_spend = top_spend.groupby("SN")

purchase_count = pd.Series(top_spend['Purchase ID'].count())
#purchase_count
average_price = pd.Series(top_spend['Price'].mean())
#print (average_price)
total_price= pd.Series(top_spend['Price'].sum())
#total_price

topspend_dict = {
    "Purchase Count" : purchase_count,
    "Average Price" : average_price,
    "Total Price" : total_price
    }

topspend_df = pd.DataFrame(topspend_dict)
topspend_df = topspend_df.sort_values('Total Price', ascending=False)

topspend_df.head()

Unnamed: 0_level_0,Purchase Count,Average Price,Total Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Idastidru52,4,3.8625,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.405,13.62
Iskadarya95,3,4.366667,13.1


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [9]:

popitem = purchase_data.set_index("Item ID")
popitem = popitem.groupby("Item ID")["Item Name"]

popitem_df = pd.DataFrame(topspend_dict)

popitem_df = popitem_df.sort_values('Purchase Count', ascending=False)
popitem_df.head()


Unnamed: 0_level_0,Purchase Count,Average Price,Total Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Iral74,4,3.405,13.62
Idastidru52,4,3.8625,15.45
Asur53,3,2.48,7.44
Inguron55,3,3.703333,11.11


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [10]:
popitem_df = popitem_df.sort_values('Total Price', ascending=False)
popitem_df.head()


Unnamed: 0_level_0,Purchase Count,Average Price,Total Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Idastidru52,4,3.8625,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.405,13.62
Iskadarya95,3,4.366667,13.1
