# Heroes Of Pymoli Data Analysis
 
-----

1) The game is most popular with male players who make up 84.03% of all players. Female players made up 14.06%  of total players and 1.91% of players did not identify as either male nor female, or else chose not to disclose their gender.

2) The majority of purchases were made by male players (652). Female players were the gender with the next highest number of purchases (113). Players who identified as other / non-dislosed made the smallest number of purchases (15). 

3) The percent of total purchases by gender (male: 83.59%, female 14.49%, other: 1.92%) closely matches the percent of total players for each gender. This suggests that players make purchases at roughly the same rate regardless of gender. 

### IMPORT DEPENDENCIES AND READ CSV FILE

In [1]:
# Dependencies and Setup
import pandas as pd

# Assign the file path to a variable
file_path = "Resources/purchase_data.csv"

# Read the csv file and store the contents in a variable
purchase_data_df = pd.read_csv(file_path)


### PLAYER COUNT

In [2]:
#Count the number of unique names in the 'SN' column
player_count = purchase_data_df['SN'].nunique()

#Create a summary dataframe to display the total number of players
player_count = {'Total Players': [player_count]}
player_count_summary_df = pd.DataFrame(player_count)

#Display the summary data frame
player_count_summary_df


Unnamed: 0,Total Players
0,576


### PURCHASING ANALYSIS (TOTAL)

In [3]:
#Count the number of unique items in the 'Item Name' column and store the result
unique_items = purchase_data_df['Item Name'].nunique()

#Calculate the mean revenue and store the result
mean_price = round(purchase_data_df['Price'].mean(), 2)

#Count the total number of purchases and store the result
purchases_number = purchase_data_df['Purchase ID'].count()

#Calculate the total revenue and store the result
total_revenue = purchase_data_df['Price'].sum()

#Create a dictionary to store the purchasing analysis values
summary_dict = {'Number of Unique items': [unique_items],
               'Average Price': [mean_price],
               'Number of Purchases': [purchases_number],
               'Total Revenue': [total_revenue]}

#Create a summary data frame
purchasing_total_df = pd.DataFrame(summary_dict)

#Display the summary data frame
purchasing_total_df

Unnamed: 0,Number of Unique items,Average Price,Number of Purchases,Total Revenue
0,179,3.05,780,2379.77


### GENDER DEMOGRAPHICS

In [4]:
# Create a new data frame grouped by 'Gender' and store the result
grouped_gender_df = purchase_data_df.groupby('Gender')

grouped_gender_df.head()

#Count the number of unique screen names per gender and store the result
unique_gender_counts = grouped_gender_df['SN'].nunique()

#Count the number of male players and store the result
male_count = unique_gender_counts['Male']

#Count the number of female players and store the result
female_count = unique_gender_counts['Female']

#Count the number of 'Other/Non-Disclosed' players and store the result
other_count = unique_gender_counts['Other / Non-Disclosed']

#Calculate the percent of players the are 'Male' and store the result
male_percent = male_count/(male_count + female_count + other_count) * 100

#Calculate the percent of players the are 'Female' and store the result
female_percent = female_count/(male_count + female_count + other_count) * 100

#Calculate the percent of players that are 'Other/Non-Disclosed' and store the result
other_percent = other_count/(male_count + female_count + other_count) * 100

#Create a summary data frame
#Create a dictionary of dictionaries to store the count and percent of players for each gender
gender_demographics = {
    'Total Count': {'Male': male_count,
                    'Female': female_count,
                    'Other / Non-Disclosed': other_count},
    
    'Percentage of Players': {'Male': round(male_percent, 2),
                              'Female': round(female_percent, 2),
                              'Other / Non-Disclosed': round(other_percent, 2)}
}

#Create a summary data frame for the count and percent of players for each gender
gender_summary_df = pd.DataFrame(gender_demographics)

#Display the summary data frame
gender_summary_df

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


### PURCHASING ANALYSIS (GENDER)

In [5]:
#Create a data frame that is grouped by gender
gender_group_df = purchase_data_df.groupby('Gender')

#Calculate the number of purchases per gender
gender_purch_count = gender_group_df['Purchase ID'].count()
gender_purch_count

#Calculate the average puchase price per gender
gender_purch_mean = gender_group_df['Price'].mean()
gender_purch_mean

#Calculate the total purchase value per gender
gender_purch_total = gender_group_df['Price'].sum()
gender_purch_total

#Calculate the number of players per gender
gender_player_count = gender_group_df['SN'].nunique()
gender_player_count

#Calculate the average purchase total per person per gender
gender_purch_per_person = gender_purch_total/gender_player_count
gender_purch_per_person

#Create a summary dataframe
gender_purch_summary_df = pd.DataFrame({
    'Purchase Count': gender_purch_count,
    'Average Purchase Price': gender_purch_mean.map('${:,.2f}'.format),
    'Total Purchase Value': gender_purch_total.map('${:,.2f}'.format),
    'Average Total Per Person': gender_purch_per_person.map('${:,.2f}'.format)
})

#Display the summary dataframe
gender_purch_summary_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


### AGE DEMOGRAPHICS


In [6]:
#Copy the original data to a new dataframe
df_copy = purchase_data_df.copy()

#Find the maximum age included in the dataset
# print(df_copy['Age'].max())

#Create bins for ages
bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]

#Create bin labels
bin_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

#Add a new column that groups the players ages into bins
df_copy['Age Groups'] = pd.cut(df_copy['Age'], bins, labels = bin_names, include_lowest = False)

#Group the dataframe based on 'Age Groups'
grouped_ages = df_copy.groupby('Age Groups')

#Count the number of purchases for each age group and store the result
player_counts = grouped_ages['SN'].nunique()

#Calculate the total number of players for all age groups and store the result
player_total = player_counts.sum() 

#Calculate the percent of total players per age group and store the result
player_percents = round((player_counts/player_total) * 100, 2)

#Merge the count and percent data frames
age_summary_df = pd.merge(player_counts, player_percents, on='Age Groups')
renamed_age_summary = age_summary_df.rename(columns={'SN_x': 'Player Count', 'SN_y': 'Percent of Players'})
renamed_age_summary

Unnamed: 0_level_0,Player Count,Percent of Players
Age Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
40+,12,2.08


### PURCHASING ANALYSIS (AGE)

In [7]:
#Copy the original data frame
age_purch_data_df = purchase_data_df.copy()

#Create bins for ages
bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]

#Create bin labels
bin_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

#Add a new column that contains bins for player ages
age_purch_data_df['Age Groups'] = pd.cut(age_purch_data_df['Age'], bins, labels = bin_names, include_lowest = False)

#Group the data frame by age groups
age_purch_grouped = age_purch_data_df.groupby('Age Groups')


#Calculate the number of purchases per age group
purchase_count_age = age_purch_grouped['Price'].count()

#Calculate the average purchase price per age group
mean_price = age_purch_grouped['Price'].mean()

#Calculate the total purchase value per age group
total_purchase_value = grouped_ages['Price'].sum()

#Calculate the average total purchase per person
total_per_person = total_purchase_value/age_purch_grouped['SN'].nunique()

#Create a summary data frame
age_purch_summary_df = pd.DataFrame({
    'Purchase Count': purchase_count_age,
    'Average Purchase Price': mean_price.map('${:,.2f}'.format),
    'Total Purchase Value': total_purchase_value.map('${:,.2f}'.format),
    'Average Total Purchase Per Person': total_per_person.map('${:,.2f}'.format)
})
#Display the summary data frame
age_purch_summary_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase Per Person
Age Groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


## Top Spenders

### TOP SPENDERS

In [8]:
#Copy the origional data frame
spenders_copy_df = purchase_data_df.copy()

#Add a column that contains the value counts for values in the 'Purchase ID' column
spenders_copy_df['Purchase Count'] = spenders_copy_df['Purchase ID'].value_counts()

#Group the data by player screen names ('SN') 
grouped_sn = spenders_copy_df.groupby('SN')

#Sum the values for each player
player_total_values = grouped_sn.sum()

#Rename the Price column to 'Average Purchase Price'
renamed_player_totals = player_total_values.rename(columns={'Price':'Total Purchase Value'})

#Add a column that calculates the average purchase price for each player 
renamed_player_totals['Average Purchase Price'] = round(renamed_player_totals['Total Purchase Value'] / renamed_player_totals['Purchase Count'], 2)

#Sort the grouped data based on the total spent per player
sorted_total_spent = renamed_player_totals.sort_values('Total Purchase Value', ascending=False)

top_5_spenders = sorted_total_spent.head()

top_5_summary = top_5_spenders[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']]
top_5_summary

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.79,18.96
Idastidru52,4,3.86,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.4,13.62
Iskadarya95,3,4.37,13.1


### MOST POPULAR ITEMS


In [9]:
#Retrieve the columns for 'Item ID', 'Item Name', and 'Item Price'
popular_items_df = purchase_data_df[['Item ID', 'Item Name', 'Price']]

#Group the data by Item ID and Item Name
grouped_items = popular_items_df.groupby(['Item ID', 'Item Name'])

#Count the number of times each item was purchased
grouped_items_count = grouped_items.count()
#Rename Price to Purchase Count before merging dataframes
renamed_grouped_count = grouped_items_count.rename(columns={'Price':'Purchase Count'})

#Calculate the total purchase value for each item
grouped_items_sum = grouped_items.sum()
#Rename Price to Total Purchase Value before merging dataframes
renamed_grouped_sum = grouped_items_sum.rename(columns={'Price':'Total Purchase Value'})

#Merge the dataframes to combine total purchase value and purchase count into 1 dataframe
merged_group = renamed_grouped_count.merge(renamed_grouped_sum, on=['Item ID', 'Item Name'])

#Calculate the item price for each item and add the values to a new column 
merged_group['Item Price'] = round((merged_group['Total Purchase Value']/merged_group['Purchase Count']) , 2)




#Sort the data to find the best-selling items by count
sorted_merged_group = merged_group.sort_values('Purchase Count', ascending=False)
#Store the 5 best-selling items
best_sellers = sorted_merged_group.head()
best_sellers


Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Total Purchase Value,Item Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,59.99,4.61
178,"Oathbreaker, Last Hope of the Breaking Storm",12,50.76,4.23
145,Fiery Glass Crusader,9,41.22,4.58
132,Persuasion,9,28.99,3.22
108,"Extraction, Quickblade Of Trembling Hands",9,31.77,3.53


### Most Profitable Items

In [10]:
#Sort the table from the previous section by Total Purchase Value in descending order
most_profitable = merged_group.sort_values('Total Purchase Value', ascending=False)
#Take the first 5 values from the sorted list
most_profitable_5 = most_profitable.head() 
#Display the 5 most profitable items
most_profitable_5

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Total Purchase Value,Item Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,59.99,4.61
178,"Oathbreaker, Last Hope of the Breaking Storm",12,50.76,4.23
82,Nirvana,9,44.1,4.9
145,Fiery Glass Crusader,9,41.22,4.58
103,Singed Scalpel,8,34.8,4.35
