### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import pandas as pd
import os

# File to Load (Remember to Change These)
file_to_load = os.path.join("Resources", "purchase_data.csv")

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

## Player Count

* Display the total number of players


In [None]:
total_number_df = pd.DataFrame([{"Total Players": len(purchase_data)}])
total_number_df

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [None]:
purchase_data.count()
purchasing_analysis = [{
    "Number of Unique Items": len(purchase_data.groupby(["Item ID"])),
    "Average Price": '${:,.2f}'.format(purchase_data["Price"].sum()/len(purchase_data)),
    "Number of Purchases": len(purchase_data),
    "Total Revenue": '${:,.2f}'.format(purchase_data["Price"].sum())
    }]
purchasing_analysis_df = pd.DataFrame(purchasing_analysis)
purchasing_analysis_df.head()

## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [None]:
purchase_data_unique_df = purchase_data.drop_duplicates(["SN"],keep="first")
gender_demographics = pd.DataFrame(purchase_data_unique_df["Gender"].value_counts())
gender_demographics.columns = ["Total Count"]

total_users = gender_demographics["Total Count"].sum()
gender_demographics["Percentage"] = [round((x/total_users)*100,2) for x in gender_demographics["Total Count"]]
gender_demographics["Percentage"] = gender_demographics["Percentage"].astype(str)+"%"
gender_demographics


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
# Get Purchase Count by Gender
male_purchases = purchase_data.loc[purchase_data["Gender"]=="Male",["Price"]]
female_purchases = purchase_data.loc[purchase_data["Gender"]=="Female",["Price"]]
other_purchases = purchase_data.loc[purchase_data["Gender"]=="Other / Non-Disclosed",["Price"]]

# Set the values for Total, Average, and Count of purchases per gender value
total_male_purchase = round(float(male_purchases.sum()),2)
total_female_purchase = round(float(female_purchases.sum()),2)
total_other_purchase = round(float(other_purchases.sum()),2)

avg_male_purchase = round(float(male_purchases.mean()),2)
avg_female_purchase = round(float(female_purchases.mean()),2)
avg_other_purchase = round(float(other_purchases.mean()),2)

num_male_purchase = round(int(male_purchases.count()),2)
num_female_purchase = round(int(female_purchases.count()),2)
num_other_purchase = round(int(other_purchases.count()),2)

# Copy the previous dataframe
gender_summary = gender_demographics

# Create new columns: Purchase Count, Average Purchase Price, Total Purchase Value, and Avg Total Purchase per Person
gender_summary["Purchase Count"] = [num_male_purchase,num_female_purchase,num_other_purchase]
gender_summary["Average Purchase Price"] = [avg_male_purchase,avg_female_purchase,avg_other_purchase]
gender_summary["Total Purchase Value"] = [total_male_purchase,total_female_purchase,total_other_purchase]
gender_summary["Total Count"] = gender_summary["Total Count"].astype(float)
gender_summary["Avg Total Purchase per Person"] = round(gender_summary["Total Purchase Value"]/gender_summary["Total Count"],2)

# Remove columns from previous exercise
gender_summary = gender_summary[["Purchase Count","Average Purchase Price","Total Purchase Value","Avg Total Purchase per Person"]]

#Formatting 
gender_summary["Average Purchase Price"] = "$" + gender_summary["Average Purchase Price"].astype(str)
gender_summary["Total Purchase Value"] = "$" + gender_summary["Total Purchase Value"].astype(str)
gender_summary["Avg Total Purchase per Person"] = "$" + gender_summary["Avg Total Purchase per Person"].astype(str)

gender_summary

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [None]:
# Get the lowest and highest age in the purchase_data
low = purchase_data["Age"].min()
high = purchase_data["Age"].max()

# Create the age bins
age_bins = [low, 9, 14, 19, 24, 29, 34, 39, high]

# Create a list of labels to associate with the bins
bin_labels = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]

# Create new column for bins
purchase_data_unique_df["Age Group"] = pd.cut(purchase_data_unique_df["Age"],age_bins,labels=bin_labels)

# Create GroupBy object
age_in_bin = purchase_data_unique_df.groupby(["Age Group"])

# Create a dataframe from the count of the Age column
age_in_bin_df = pd.DataFrame(age_in_bin["Age"].count())

# Rename Total column and Add Percentage column
age_in_bin_df.columns = ['Total Count']
age_in_bin_df["Percentage"] = (round(age_in_bin_df["Total Count"]/len(purchase_data_unique_df)*100,2)).astype(str)+"%"

age_in_bin_df

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
# Find Total Purchase Count by Age Group
count_binned = pd.DataFrame(age_in_bin["Price"].count())
count_binned.columns = ["Purchase Count"]
count_binned.reset_index(inplace=True)

# Find Average Purchase Price by Age Group
average_binned = pd.DataFrame(round(age_in_bin["Price"].mean(),2))
average_binned.columns = ["Average Purchase Price"]
average_binned.reset_index(inplace=True)

# Find Total Amount spent by Age Group
sum_binned = pd.DataFrame(round(age_in_bin["Price"].sum(),2))
sum_binned.columns = ['Total Purchase Value']
sum_binned.reset_index(inplace=True)

# Merge dfs
merged_bins = pd.merge(count_binned,average_binned,on="Age Group")
merged_bins = pd.merge(merged_bins,sum_binned,on="Age Group")


merged_bins["Avg Total Purchase per Person"] = merged_bins["Total Purchase Value"]/age_in_bin_df["Total Count"].astype(float)

# Formatting
merged_bins["Average Purchase Price"] = "$" + merged_bins["Average Purchase Price"].astype(str)
merged_bins["Total Purchase Value"] = "$" + merged_bins["Total Purchase Value"].astype(str)
merged_bins["Avg Total Purchase per Person"] = "$" + merged_bins["Avg Total Purchase per Person"].astype(str)

merged_bins

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
group_by_sn = purchase_data.groupby(["SN"])

# Find the sum of the price column per screenname
user_spent = pd.DataFrame(group_by_sn["Price"].sum())
user_count = pd.DataFrame(group_by_sn["SN"].count())

user_spent.columns = ["Total Purchase Value"]
user_count.columns = ["Purchase Count"]

user_spent.reset_index(inplace=True)
user_count.reset_index(inplace=True)

# Merge the two dfs together
top_spenders_reordered = pd.merge(user_spent,user_count,on="SN")

# Sort the values by Total Purchase Value
top_spenders_reordered = top_spenders_reordered.sort_values("Total Purchase Value",ascending=False)


# Add a column for Average Purchase Price
top_spenders_reordered["Average Purchase Price"] = round((top_spenders_reordered["Total Purchase Value"] / top_spenders_reordered["Purchase Count"]),2)

# Reorder columns, reset index
top_spenders_reordered = top_spenders_reordered[["SN","Purchase Count","Average Purchase Price","Total Purchase Value"]]
top_spenders_reordered.reset_index(inplace=True,drop=True)

#Formatting
top_spenders_reordered["Average Purchase Price"] = "$" + (round(top_spenders_reordered["Average Purchase Price"],2)).astype(str)
top_spenders_reordered["Total Purchase Value"] = "$" + (round(top_spenders_reordered["Total Purchase Value"],2)).astype(str)

top_spenders_reordered.head()

## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
# Group by Item Name
by_item = purchase_data.groupby(["Item Name"])


# Count by Item ID to get Purchase Count
number_sold = pd.DataFrame(by_item["Item ID"].count())
number_sold.columns = ["Purchase Count"]
number_sold.reset_index(inplace=True)

# Take the average of the price column for Item Price
# (Some items are sold at different prices)
item_price = pd.DataFrame(round(by_item["Price"].mean(),2))
item_price.columns = ["Item Price"]
item_price.reset_index(inplace=True)

# Take the sum of the Price column for the total revenue per item
total_value = pd.DataFrame(by_item["Price"].sum())
total_value.columns = ["Total Purchase Value"]
total_value.reset_index(inplace=True)

# Take the max of the Item ID column to pull the Item ID
item_id = pd.DataFrame(by_item["Item ID"].max())
item_id.reset_index(inplace=True)

# Merge the dfs together
merged_byitem = pd.merge(number_sold,item_price,on="Item Name")
merged_byitem = pd.merge(merged_byitem,total_value,on="Item Name")
merged_byitem = pd.merge(merged_byitem,item_id,on="Item Name")

# Reorder columns
merged_byitem = merged_byitem[["Item ID", "Item Name", "Purchase Count", "Item Price", "Total Purchase Value"]]

# Sorting by Purchase Count with Total Purchase Value as Tie breaker
merged_byitem = merged_byitem.sort_values(by=["Purchase Count", "Total Purchase Value"], ascending=False)

# Formatting
merged_byitem_formatted = merged_byitem
merged_byitem_formatted["Item Price"] = "$" + merged_byitem_formatted["Item Price"].astype(str)
merged_byitem_formatted["Total Purchase Value"] = "$" + (round(merged_byitem_formatted["Total Purchase Value"],2)).astype(str)

merged_byitem_formatted.head()

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [None]:
# Sorting by Purchase Count with Total Purchase Value as Tie breaker
merged_byitem["Total Purchase Value"] = merged_byitem["Total Purchase Value"].str.replace("$", "", regex=True)
merged_byitem["Total Purchase Value"] = merged_byitem["Total Purchase Value"].astype(float)

# Sorting by Purchase Count with Total Purchase Value as Tie breaker
merged_byitem = merged_byitem.sort_values(by=["Total Purchase Value"], ascending=False)

# Formatting
merged_byitem["Total Purchase Value"] = "$" + (round(merged_byitem["Total Purchase Value"],2)).astype(str)

merged_byitem.head()