### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

## Player Count

* Display the total number of players


In [11]:
unique_players = purchase_data.loc[:, ['SN']]
unique_players = unique_players.drop_duplicates()
total_player = unique_players.count()[0]  #get all the unique values first, using SN at this point

pd.DataFrame({'Total Players': [total_player]})

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [12]:
item_id = purchase_data['Item ID'].value_counts() #get Number of Unique items via "Item ID"
#print(item_id.shape[0])

total_revenue = purchase_data['Price'].sum() #sum of price to get revenue
#print(total_revenue)
average_price = round(purchase_data['Price'].mean() , 2) #mean of price to get average price
#print(average_price)

num_purchases = purchase_data['Purchase ID'].value_counts() #get number of purchases via "Purchase ID"
#print(num_purchases.shape[0])

purchase_analysis = {'Number of Unique Items':[item_id.shape[0]], 'Average Price': [average_price],
                     'Number of Purchases':[num_purchases.shape[0]], 'Total Revenue': [total_revenue] }


purchasing_analysis_df = pd.DataFrame(purchase_analysis, index=[0]) #creating a new DF on purchase_analysis dictionary

purchasing_analysis_df['Average Price'] = purchasing_analysis_df['Average Price'].map('${:.2f}'.format)
purchasing_analysis_df['Total Revenue'] = purchasing_analysis_df['Total Revenue'].map('${:.2f}'.format)
# pd.options.display.float_format = '${:,.2f}'.format - cannot be used in this case
# as it will make all float with the same format

purchasing_analysis_df




Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,$2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [13]:
gender_count = purchase_data.groupby(['Gender'])
gender_display = gender_count['Gender'].count()
# print(gender_display) to identify the different type of genders

male_player = (purchase_data['Gender'] == 'Male').sum()
female_player = (purchase_data['Gender'] == 'Female').sum()
nondis_player = (purchase_data['Gender'] == 'Other / Non-Disclosed').sum()
#get all the number of players according to gender, can also use the groupby function

all_player = male_player + female_player + nondis_player #get the total for all players and their respective percentages
male_perc = ((male_player/all_player)*100)
female_perc = (female_player/all_player)*100
nondis_perc = (nondis_player/all_player)*100
combined_gender = {'Total Players': [male_player, female_player, nondis_player],  
               'Percentage of Players':[male_perc, female_perc, nondis_perc]} 

gender_demo_df = pd.DataFrame(combined_gender, index=['Male','Female','Other / Non-Disclosed'], columns=['Total Players', 'Percentage of Players'])
gender_demo_df['Percentage of Players'] = gender_demo_df['Percentage of Players'].map('{:.2f}%'.format)
#formatting to have Percentage in 2 decimals and % at the end. 
gender_demo_df


Unnamed: 0,Total Players,Percentage of Players
Male,652,83.59%
Female,113,14.49%
Other / Non-Disclosed,15,1.92%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [17]:
#gender_display is unique and can also be used as purchase count, or can also use Purchase ID.
#alternatively - purchase_analysis_gender = purchase_data.groupby(['Gender'])['Price'].count()

#getting average purchase price for each gender, initially using the following however its too repetitive and long winded
# average_purchase_female= purchase_data.loc[(purchase_data.Gender == 'Female'), 'Price'].mean()
# average_purchase_male = purchase_data.loc[(purchase_data.Gender == 'Male'), 'Price'].mean()
# average_purchase_nondis = purchase_data.loc[(purchase_data.Gender == 'Other / Non-Disclosed'), 'Price'].mean()
average_purchase_gender = purchase_data.groupby(['Gender'])['Price'].mean()

#getting total purchase value for each gender, similarly with the above, using groupby method, much simpler
# total_purchase_female = purchase_data.loc[(purchase_data.Gender == 'Female'), 'Price'].sum()
# total_purchase_male = purchase_data.loc[(purchase_data.Gender == 'Male'), 'Price'].sum()
# total_purchase_nondis = purchase_data.loc[(purchase_data.Gender == 'Other / Non-Disclosed'), 'Price'].sum()
total_purchase_gender = purchase_data.groupby(['Gender'])['Price'].sum()

#getting average total purchase per gender with total purchase value divided by each gender demographics
average_totpur_gender = total_purchase_gender/gender_demo_df['Total Players']


#let's create the DataFrame
purchase_analysis_gender_df = pd.DataFrame({'Purchase Count' : gender_display,
                                           'Average Purchase Price': average_purchase_gender,
                                           'Total Purchase Value': total_purchase_gender,
                                           'AVG Total Purchase per Person': average_totpur_gender })
#format money with $
purchase_analysis_gender_df['Average Purchase Price'] = purchase_analysis_gender_df['Average Purchase Price'].map('${:.2f}'.format)
purchase_analysis_gender_df['Total Purchase Value'] = purchase_analysis_gender_df['Total Purchase Value'].map('${:.2f}'.format)
purchase_analysis_gender_df['AVG Total Purchase per Person'] = purchase_analysis_gender_df['AVG Total Purchase per Person'].map('${:.2f}'.format)

purchase_analysis_gender_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,AVG Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$3.20
Male,652,$3.02,$1967.64,$3.02
Other / Non-Disclosed,15,$3.35,$50.19,$3.35


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [23]:
# Create the bins in which Data will be held 
bins = [0, 9.99, 14.99, 19.99, 24.99, 29.99, 34.99, 39.99, 999]

# Create the names for the five bins
age_bin_labels = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']

#using pd.cut to create a new column 'Age Range' in the original purchase_data
purchase_data["Age Range"] = pd.cut(purchase_data["Age"], bins, labels= age_bin_labels, include_lowest=True)
age_range_total = purchase_data['Age Range'].value_counts() #to get individual count for each age range
age_perc = (age_range_total / total_player)* 100 #to get percentage of age group

#let's create the DataFrame
age_demo_analysis_df = pd.DataFrame({'Total Count': age_range_total,
                                   'Percentage of Players': age_perc})

#formatting, to have Percentage of Players with % and rearrange the index
age_demo_analysis_df['Percentage of Players'] = age_demo_analysis_df['Percentage of Players'].map('{:.2f}%'.format)
age_demo_analysis_df.sort_index() # this is required as DataFrame is showing Total count as descending 


Unnamed: 0,Total Count,Percentage of Players
<10,23,3.99%
10-14,28,4.86%
15-19,136,23.61%
20-24,365,63.37%
25-29,101,17.53%
30-34,73,12.67%
35-39,41,7.12%
40+,13,2.26%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

