### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
data_file = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(data_file)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [2]:
total_players = purchase_data["SN"].nunique()
total_players
total_players_df = pd.DataFrame({"Total Player": [total_players]})
total_players_df

Unnamed: 0,Total Player
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
# Number of unique items

unique_items = len(purchase_data["Item Name"].unique())
unique_items


179

In [4]:
# Average purchase price (mean of price)

avg_price = purchase_data["Price"].mean()
avg_price

3.050987179487176

In [5]:
# Number of purchases

purchases = purchase_data["Purchase ID"].count()
purchases

780

In [6]:
# Total Revenue (sum of price)

total_revenue = purchase_data["Price"].sum()
total_revenue

2379.77

In [7]:
# Purchasing analysis dataframe

purchasing_analysis = pd.DataFrame({"Number of Unique Items": [unique_items],
                                    "Average Price": [avg_price],
                                    "Number of Purchases": [purchases],
                                    "Total Revenue": [total_revenue]
                                   })

# Format average price and total revenue as currency
purchasing_analysis["Average Price"] = purchasing_analysis["Average Price"].map("${:.2f}".format)
purchasing_analysis["Total Revenue"] = purchasing_analysis["Total Revenue"].map("${:.2f}".format)
purchasing_analysis

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,$2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [29]:
# female_count = gender_index.loc[["Female"]]
# female_count

# gender_demographics = pd.DataFrame({"Female": [female_count]})
# gender_demographics

# gender = gender_index.loc[["Female", "Male", "Other / Non-Disclosed"], ["Item ID", "Price"]]
# purchase_data['Gender'].value_counts()

gender_df = purchase_data.loc[:,["SN","Gender"]]
gender_reduced = gender_df.drop_duplicates(subset=["SN"])
gender_reduced

Unnamed: 0,SN,Gender
0,Lisim78,Male
1,Lisovynya38,Male
2,Ithergue48,Male
3,Chamassasya86,Male
4,Iskosia90,Male
...,...,...
773,Hala31,Male
774,Jiskjask80,Male
775,Aethedru70,Female
777,Yathecal72,Male


In [30]:
# All genders without duplicate SN

gender_df = purchase_data.drop_duplicates(subset=["SN"])
gender_only = gender_df["Gender"]
gender_only.head()


0    Male
1    Male
2    Male
3    Male
4    Male
Name: Gender, dtype: object

In [44]:
# Count of each gender without duplicate players

gender_count = gender_only.count()
gender_count

576

In [51]:
# Count of male players

male_count = gender_reduced.loc[gender_reduced["Gender"] == "Male", ["Gender"]].count()
male_count

male_percentage = male_count / gender_count
male_percentage


Gender    0.840278
dtype: float64

In [48]:
# Count of female players

female_count = gender_reduced.loc[gender_reduced["Gender"] == "Female", ["Gender"]].count()
female_count

female_percentage = female_count / gender_count
female_percentage

Gender    0.140625
dtype: float64

In [52]:
# Count of Other / Non-Disclosed players

other_count = gender_reduced.loc[gender_reduced["Gender"] == "Other / Non-Disclosed", ["Gender"]].count()
other_count

other_percentage = other_count / gender_count
other_percentage

Gender    0.019097
dtype: float64

In [36]:
columns = ['Total Count', 'Percentage of Players']
index = ['Male', 'Female', 'Other / Non-Disclosed']
percentage_players = pd.DataFrame(index=index, columns=columns)

# percentage_players = pd.DataFrame({"Total Count": [male_percentage]})
# percentage_players


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
# # Create list to only show Gender and Price columns
gender_prices_list = ['Gender', 'Price']

# only_gender = purchase_data[gender_prices_list]
# only_gender.head()

In [None]:
# female = only_gender.loc[only_gender["Gender"] == "Female", ["Price"]].count()
# female


In [None]:
# Purchase count

gender_purchase_count = purchase_data[gender_prices_list].groupby(['Gender']).count()
gender_purchase_count = gender_purchase_count.rename(columns={"Price": "Purchase Count"})
gender_purchase_count

In [None]:
# Average Purchase Price

avg_gender_price = purchase_data[gender_prices_list].groupby(['Gender']).mean()
avg_gender_price = avg_gender_price.rename(columns={"Price": "Average Purchase Price"})
avg_gender_price


In [None]:
# Total Purchase Value
total_gender_value = purchase_data[gender_prices_list].groupby(['Gender']).sum()
total_gender_value = total_gender_value.rename(columns={"Price": "Total Purchase Value"})
total_gender_value

In [None]:
# Average Total Purchase per Person



In [None]:
# Merge dataframes
# gender_purchase_count
# avg_gender_price
# total_gender_value

merged1 = pd.merge(gender_purchase_count, avg_gender_price, on="Gender", how="left")
gender_analytics_df = pd.merge(merged1, total_gender_value, on="Gender", how="left")
gender_analytics_df

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

