### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
data_file = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(data_file)
purchase_data.head()

purchase_data.groupby('Gender')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7ff15071abe0>

## Player Count

* Display the total number of players


In [2]:
total_players = purchase_data["SN"].nunique()
total_players
total_players_df = pd.DataFrame({"Total Player": [total_players]})
total_players_df

Unnamed: 0,Total Player
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
# Number of unique items

unique_items = len(purchase_data["Item Name"].unique())
unique_items


179

In [4]:
# Average purchase price (mean of price)

avg_price = purchase_data["Price"].mean()
avg_price

3.050987179487176

In [5]:
# Number of purchases

purchases = purchase_data["Purchase ID"].count()
purchases

780

In [6]:
# Total Revenue (sum of price)

total_revenue = purchase_data["Price"].sum()
total_revenue

2379.77

In [7]:
# Purchasing analysis dataframe

purchasing_analysis = pd.DataFrame({"Number of Unique Items": [unique_items],
                                    "Average Price": [avg_price],
                                    "Number of Purchases": [purchases],
                                    "Total Revenue": [total_revenue]
                                   })

# Format average price and total revenue as currency
purchasing_analysis["Average Price"] = purchasing_analysis["Average Price"].map("${:.2f}".format)
purchasing_analysis["Total Revenue"] = purchasing_analysis["Total Revenue"].map("${:.2f}".format)
purchasing_analysis

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,$2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [8]:
# Create dataframe without duplicate playrs

gender_df = purchase_data.loc[:,["SN","Gender"]]
gender_reduced = gender_df.drop_duplicates(subset=["SN"])
gender_reduced.head()

Unnamed: 0,SN,Gender
0,Lisim78,Male
1,Lisovynya38,Male
2,Ithergue48,Male
3,Chamassasya86,Male
4,Iskosia90,Male


In [9]:
# Count of each gender

gender_only_count = gender_reduced["Gender"].value_counts()
gender_only_count


Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [10]:
# Percentage of players by gender

gender_percentage = gender_df["Gender"].value_counts(1)
gender_percentage


Male                     0.835897
Female                   0.144872
Other / Non-Disclosed    0.019231
Name: Gender, dtype: float64

In [11]:
# Dataframe with total count and percentage of players by gender

gender_analysis_df = pd.DataFrame({"Total Count": gender_only_count,
                                  "Percentage of Players": gender_percentage})

gender_analysis_df["Percentage of Players"] = gender_analysis_df["Percentage of Players"].map("{:.2f}%".format)
gender_analysis_df

Unnamed: 0,Total Count,Percentage of Players
Male,484,0.84%
Female,81,0.14%
Other / Non-Disclosed,11,0.02%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [12]:
# # Create list to only show Gender and Price columns
gender_prices_list = ['Gender', 'Price']

# only_gender = purchase_data[gender_prices_list]
# only_gender.head()

In [13]:
# female = only_gender.loc[only_gender["Gender"] == "Female", ["Price"]].count()
# female


In [14]:
# Purchase count

gender_purchase_count = purchase_data[gender_prices_list].groupby(['Gender']).count()
gender_purchase_count = gender_purchase_count.rename(columns={"Price": "Purchase Count"})
gender_purchase_count

Unnamed: 0_level_0,Purchase Count
Gender,Unnamed: 1_level_1
Female,113
Male,652
Other / Non-Disclosed,15


In [15]:
# Average Purchase Price

avg_gender_price = purchase_data[gender_prices_list].groupby(['Gender']).mean()
avg_gender_price = avg_gender_price.rename(columns={"Price": "Average Purchase Price"})
avg_gender_price


Unnamed: 0_level_0,Average Purchase Price
Gender,Unnamed: 1_level_1
Female,3.203009
Male,3.017853
Other / Non-Disclosed,3.346


In [16]:
# Total Purchase Value
total_gender_value = purchase_data[gender_prices_list].groupby(['Gender']).sum()
total_gender_value = total_gender_value.rename(columns={"Price": "Total Purchase Value"})
total_gender_value

Unnamed: 0_level_0,Total Purchase Value
Gender,Unnamed: 1_level_1
Female,361.94
Male,1967.64
Other / Non-Disclosed,50.19


In [17]:
# Average Total Purchase per Person



In [18]:
# Merge dataframes
# gender_purchase_count
# avg_gender_price
# total_gender_value

merged1 = pd.merge(gender_purchase_count, avg_gender_price, on="Gender", how="left")
gender_analytics_df = pd.merge(merged1, total_gender_value, on="Gender", how="left")
gender_analytics_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,3.203009,361.94
Male,652,3.017853,1967.64
Other / Non-Disclosed,15,3.346,50.19


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

