### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd


# File to Load: ('resources/purchase_data.csv')

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv("resources/purchase_data.csv")

# See what the dataframe looks like 
# purchase_data.head()
# purchase_data.shape


In [2]:
# Reading data into a DataFram
purchase_data_df = pd.DataFrame(purchase_data)
purchase_data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


## Player Count

* Display the total number of players


In [3]:
total_players = purchase_data_df.SN.count()
print(f'The Total Numbers of Players:  {total_players}')

The Total Numbers of Players:  780


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [4]:
# Get the nuber of unique items
unique_items = purchase_data_df["Item ID"].nunique()
unique_items


179

In [5]:
# Calculate the average price & print out with formatting

avg_data = purchase_data_df["Price"].mean()
avg_data = '${:.2f}'.format(avg_data)
print(f'Average Price: {avg_data}')

Average Price: $3.05


In [6]:
# Calculate the number of purchases

items_purchased = purchase_data_df.Price.count()
items_purchased

780

In [7]:
# Calculate total revenue

total_revenue = purchase_data_df.Price.sum()
total_revenue

2379.77

In [8]:
# Summary
summary_df = pd.DataFrame({"Unique Items" : [unique_items],
                          "Average Price" : [avg_data],
                          "Total Items Purchased" : [items_purchased],
                          "Total Revenue" : [total_revenue]})

summary_df

Unnamed: 0,Unique Items,Average Price,Total Items Purchased,Total Revenue
0,179,$3.05,780,2379.77


## Gender Demographics

In [9]:
# Use mean & count functions to get the summary dataframe started..

by_gender = purchase_data_df.groupby(["Gender"]).agg({"Age" : ['mean'],  "Gender" : ['count']})
by_gender

Unnamed: 0_level_0,Age,Gender
Unnamed: 0_level_1,mean,count
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2
Female,21.345133,113
Male,22.917178,652
Other / Non-Disclosed,24.2,15


In [10]:
# Make the percent calculation and add it to the df

by_gender['Percent'] = (by_gender['Gender'] / by_gender['Gender'].sum()*100)
by_gender

Unnamed: 0_level_0,Age,Gender,Percent
Unnamed: 0_level_1,mean,count,Unnamed: 3_level_1
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Female,21.345133,113,14.487179
Male,22.917178,652,83.589744
Other / Non-Disclosed,24.2,15,1.923077


In [11]:
# Count and Average Purchase by Gender

by_gender_purchases = purchase_data_df.groupby(["Gender"]).agg({"Item ID" : ['count'], "Price" : ['mean'] })
by_gender_purchases
by_gender_purchases_df = pd.DataFrame(by_gender_purchases)
by_gender_purchases_df

Unnamed: 0_level_0,Item ID,Price
Unnamed: 0_level_1,count,mean
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2
Female,113,3.203009
Male,652,3.017853
Other / Non-Disclosed,15,3.346


In [12]:
# Put it in same datafram as gender: mean & count
sum_purchases_group = purchase_data_df.groupby(["Gender"]).sum()
sum_purchases_group_df = pd.DataFrame(sum_purchases_group["Price"])
sum_purchases_group_df

Unnamed: 0_level_0,Price
Gender,Unnamed: 1_level_1
Female,361.94
Male,1967.64
Other / Non-Disclosed,50.19


In [16]:
merge_df = pd.merge(by_gender_purchases_df, sum_purchases_group_df, on ="Gender",)
merge_df

Unnamed: 0_level_0,"(Item ID, count)","(Price, mean)",Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,3.203009,361.94
Male,652,3.017853,1967.64
Other / Non-Disclosed,15,3.346,50.19


In [None]:
# Average Age by Gender
grouped_gender_age = purchase_data_df.groupby("Gender")
grouped_genderage.mean()["Age"]

In [None]:
# Player Count by Gender
grouped_player_count = purchase_data_df.groupby("Gender")
grouped_player_count.count()["SN"]


In [None]:
summary_bygender_df = pd.DataFrame({"Average Price" : [grouped_price_gender].mean()],
    [grouped_gender["Age"].mean()])
summary_bygender

In [None]:
# Player Count & percentage by Gender
grouped_age = purchase_data_df.groupby("Gender")
grouped_age.count()["SN"]

In [None]:
# Count of Males Players
male_players = purchase_data_df["Gender"].value_counts()["Male"]
male_players

In [None]:
# Calculate the percent of male players
percent_males = [male_players/total_players]
percent_males
# Calculate the percent of Male Players

In [None]:
# Count of Female Players
female_players = purchase_data_df["Gender"].value_counts()["Female"]
female_players

In [None]:
# Calculate the percent of female players
percent_females = [female_players/total_players]
percent_females
# Calculate the percent of Female Players

In [None]:
# purchase_data_df['Gender'].value_counts()   ***To find specific label(s)

# Count of Other Players
other_players = purchase_data_df["Gender"].value_counts()["Other / Non-Disclosed"]
other_players

In [None]:
# Calculate the percent of Other Players
percent_other = [other_players/total_players]
percent_other



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
# Add, percent, ,ave purchase total
gender_summary = pd.DataFrame(
    "")

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

