### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [74]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [75]:
#print first five rows of imported csv file
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [76]:
#get the column names of the dataset

[x for x in purchase_data.columns]

['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price']

In [77]:
# using .value_counts() lets us know how many times each SN (Screen Name)
# appears in the loaded dataset, but the length of the list
# will let let us know how many players are in the list.

#PurchaseDataPlayers = purchase_data["SN"].value_counts()
TotalPlayers = len(purchase_data["SN"].value_counts())

#print(PurchaseDataPlayers.head())
print(TotalPlayers)


576


## Player Count

* Display the total number of players


In [78]:
# make the TotalPlayers list a data frame to display the number of total players

TotalPlayers_df = pd.DataFrame({"Total Players": [TotalPlayers]})
TotalPlayers_df

Unnamed: 0,Total Players
0,576


In [79]:
#Get a list of unique Item ID's to determine all Items sold in the dataset

UniqueItems = purchase_data["Item ID"].unique()
UniqueItems

array([108, 143,  92, 100, 131,  81, 169, 162,  21, 136,  95, 116,   4,
       165,  98,  40, 161,  82,  89,  57, 168,  24, 151, 132, 178,  71,
        96, 119,  37, 140, 179, 133,  44, 160,  53,  76,  73, 172,   7,
        72,   9, 181, 102, 170, 138, 110,  22,  15,  60, 176,  25,  84,
        80, 152, 105, 125,  56,  34,   6,  27,  29,  68, 124,  88,  20,
        50, 174,  12,  33, 129,  17,  59,  39, 164, 134, 109,  10,  54,
        99,  85, 139,  41, 150,  13, 117,  78,  87,   1,  62,  75,  58,
        74, 120, 145,   0,   2, 146, 158,   8, 175,  46, 148, 111,   3,
        65, 183,  30, 157, 123,   5, 144, 103, 135,  51, 154,  32,  77,
        11,  63,  69, 113, 153, 114, 149, 159, 137, 142,  66,  97, 163,
       107,  16,  42,  19,  83,  28,  94, 182,  70,  18, 166, 173, 167,
       155, 128, 156, 147,  35, 171,  52, 106,  38,  91, 118,  67,  45,
        49, 121,  79,  14,  43,  31,  64,  86,  93,  26, 122,  61,  48,
       112, 126, 115,  55, 127,  47, 130,  90, 177, 104], dtype=

In [80]:
#The length of the list of unique Item ID's will determine how many 
# items were sold

NumberOfUniqueItems = len(UniqueItems)
NumberOfUniqueItems

179

In [81]:
UniquePurchaseItems = purchase_data["Purchase ID"].unique()
UniquePurchaseItems

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [86]:
NumberOfUniquePurchaseItems = len(UniquePurchaseItems)
NumberOfUniquePurchaseItems

780

In [91]:
# use .sum() to add all of the prices in the Price column
SumItems = purchase_data["Price"].sum()
SumItems

2379.77

In [92]:
# Average Price would be the sum of all prices divided by the number of unique items sold
AveragePrice = SumItems / NumberOfUniquePurchaseItems
AveragePrice

3.0509871794871795

In [125]:
def format(x):
    return "${0:,.2f}".format(x)
    
purchasing_analysis_df = pd.DataFrame({
    "Number of Unique Items": [NumberOfUniqueItems],
    "Average Price" : [AveragePrice],
    "Number of Purchases" : [NumberOfUniquePurchaseItems],
    "Total Revenue" : [SumItems]
    })

purchasing_analysis_df["Average Price"] = purchasing_analysis_df["Average Price"].apply(format)
purchasing_analysis_df["Total Revenue"] = purchasing_analysis_df["Total Revenue"].apply(format)

purchasing_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed





## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

