In [None]:
#Importing Dependencies 
import pandas as pd
import numpy as np

In [None]:
#Creates file path to the game purchase data
data_file = "Resources/purchase_data.csv"

In [None]:
#Allows the csv to be read and displays the top portion of it
game_data = pd.read_csv(data_file)
game_data.tail()

In [None]:
#Takes the length of the SN column and gives the total valuse count for that specific column
total_players = len(game_data["SN"].value_counts())
total_players
#Creates a dataframe of the SN ids which gives a count of the total players then displays the result
total_players_df = pd.DataFrame({"Total Players":[total_players]})
total_players_df

In [None]:
#Variable that Stores the length of unique "Item ID" values
unique_items = len(game_data["Item ID"].unique())

#Testing
#unique_items_df = pd.DataFrame({"Number of Unique Items": [unique_items]})
#unique_items_df

#Variable  that stores the average prices column  
mean_price = (game_data["Price"].mean())
#Testing
#mean_price_df = pd.DataFrame({"Average Price":[mean_price]})
#mean_price_df

#Variable that stores the length of the purchase ID readily for use to display in dataframe towards total count of Purchase ID's
purchase_num = len(game_data["Purchase ID"])

#testing
#purchase_num

#Variable that stores price values and calculates the total sum
revenue_sum = (game_data["Price"]).sum()
#Testing
#revenue_sum

#Creates a data frame using above variables and names their respective headers
purchasing_analysis_df = pd.DataFrame({"Number of Unique Items": [unique_items],
                                      "Average Price": [mean_price],
                                      "Number of Purchases": [purchase_num],
                                      "Total Revenue": [revenue_sum]})

#Changes the Format for "Average Price" and "Total Revenue" to Dollars using astype formatting 
purchasing_analysis_df["Average Price"] = purchasing_analysis_df["Average Price"].astype(float).map("${:,.2f}".format)
purchasing_analysis_df["Total Revenue"] = purchasing_analysis_df["Total Revenue"].astype(float).map("${:,.2f}".format)

#Displaying Dataframe
purchasing_analysis_df

In [None]:
#Variable that groups data values by their respective gender type
gender_info = game_data.groupby("Gender")
#After grouping by gender, this takes the count of males/females by using the "SN column and extracts all the unique total values"
gender_count = gender_info.nunique()["SN"]
#Formula To Calculate the percent using previous created variable total players and new gender count variable
gender_percent = gender_count / total_players * 100

#Creates a dataframe that displays total gender count and percentages and gives them headers
gender_df = pd.DataFrame({"Percent of Players": gender_percent,"Total Count": gender_count})
#Removes the "Gender" display making viewability more simple
#gender_df.index.name = None
#sorts "Total Count" Values by returning largest values first and coverts Percentages to Percent
gender_df.sort_values(["Total Count"], ascending = False).style.format({"Percentage of Players":"{:.2f}"})

In [None]:
#Variable that stores total count of Purchases by gender from Purchase ID
gender_analysis_count = gender_info["Purchase ID"].count()
#Variable that stores average for gender prices column  
gender_mean_purchase_price = gender_info["Price"].mean()
#Variable that stores total sum of prices
gender_mean_purchase_sum = gender_info["Price"].sum()
#Variable that stores formula to calculate average purchase per individual by gender
gender_purchase_per_person = gender_mean_purchase_sum / gender_count

#Variable that creates a dataframe of all the above created variables and gives them their respective headers
gender_df = pd.DataFrame({"Purchase Count": gender_analysis_count, "Average Purchase Price": gender_mean_purchase_price, "Average Purchase Value": gender_mean_purchase_sum, "Average Purchase Total Per Person": gender_purchase_per_person})
#Adds the gender header disply 
gender_df.index.name = "Gender"
#Formats the "Average" price, value, and per person amount to dollars 
gender_df.style.format({"Average Purchase Value":"${:,.2f}", "Average Purchase Price":"${:,.2f}", "Average Purchase Total Per Person":"${:,.2f}"})

In [None]:
#Creates a bin that stores floats that are as close as possible to respective category labels without surpassing them minus the last one (150)
bin_for_age = [0, 9.99, 14.99, 19.99, 24.99, 29.99, 34.99, 39.99, 150] 
#Creates a bin that will display the age categorys
bin_for_years = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

#Uses pd.cut to slice the csv's "Age" section to use in accordance with created bin_for_age along with labeling them created bin_for_years
game_data["Age Group"] = pd.cut(game_data["Age"],bin_for_age, labels= bin_for_years)
game_data

#Groups csv's data by "Age Group"
age_group = game_data.groupby("Age Group")
#Variable that stores the "SN" columns unique values from new group "Age Group"
age_count = age_group["SN"].nunique()
#Variable that store the formula that holdes the age percent value 
age_percent = (age_count / total_players ) * 100

#Creates a data frame that will display "Percentage of Players" and "Total Count"
age_full_group = pd.DataFrame({"Percentage of Players": age_percent, "Total Count": age_count})
#Removes the "Age Group" title display
#age_full_group.index.name = None
#Formats the "Percentage of Players" to percent
age_full_group.style.format({"Percentage of Players":"{:,.2f}"})

In [None]:
#Variable that stores total count of "Purchase ID" of age_group dataframe
age_purchase = age_group["Purchase ID"].count()
#Variable that stores average price of age_group dataframe
age_mean_purchase_price = age_group["Price"].mean()
#Variable that stores average sum of "Price" column 
age_sum_value = age_group["Price"].sum()
#Variable that stores formula that calculates average purchase per person
average_purchase_per_person = age_sum_value / age_count
#Variable that creates a dataframe of all the above variables and gives them a header
age_value_df = pd.DataFrame({"Purchase Count": age_purchase,
                             "Average Purchase Price": age_mean_purchase_price,
                             "Total Purchase Value": age_sum_value,
                             "Average Purchase Total Per Person": average_purchase_per_person})
#Removes the title display for age value
age_value_df.index.name = None
#Formats the count, price, value, and per person amount to dollars 
age_value_df.style.format({"Average Purchase Price":"${:,.2f}", "Total Purchase Value":"${:,.2f}", "Average Purchase Total Per Person":"${:,.2f}"})

In [None]:
#Creates a variable that groups the data by "SN" column
big_spender = game_data.groupby("SN")
#Creates a variable that stores the count for purchase ID
spender_purchase_count = big_spender["Purchase ID"].count()
#Creates a variable that stores the average for price
spender_purchase_mean = big_spender["Price"].mean()
#Creates a variable that stores the total sum for price based on grouped SN data
spender_purchase_sum = big_spender["Price"].sum()

#Variable that creates a dataframe of all the above variables and gives them a header
spender_df = pd.DataFrame({"Purchase Count": spender_purchase_count, "Average Purchase Price": spender_purchase_mean, "Total Purchase Value":spender_purchase_sum})

#Sorts "Total Purchase Value" Values by returning largest values first
sort_spenders = spender_df.sort_values(["Total Purchase Value"], ascending=False).head()
#Formats specific variables to dollar amount 
sort_spenders.style.format({"Average Purchase Total":"${:,.2f}", "Average Purchase Price":"${:,.2f}",  "Total Purchase Value":"${:,.2f}"})

In [None]:
#Variable that stores the datasets "Item ID", "Item Name", and "Price" columns
pop_items = game_data[["Item ID", "Item Name","Price"]]
#Variable that groups those previous columns by "Item ID" and "Item Name"
pop_items_group = pop_items.groupby(["Item ID","Item Name"])
#Variable that stores the totat count of "Price"
pop_purchase_count = pop_items_group["Price"].count()
#Variable that stores the total sum of the Price column
pop_value_sum = (pop_items_group["Price"].sum())
#Variable that stores a formula that calculates the individual price per person of popular items
pop_individual_price = pop_value_sum / pop_purchase_count

#Variable that creates a dataframe of all the above variables and gives them a header
pop_items_df = pd.DataFrame({"Purchase Count": pop_purchase_count,"Item Price": pop_individual_price,"Total Purchase Value":pop_value_sum})
#Sorts "Purchase Count" Values by returning largest values first
pop_items_df_sorted = pop_items_df.sort_values(["Purchase Count"], ascending=False).head()
#Formats specific variables to dollar amount 
pop_items_df_sorted.style.format({"Item Price":"${:,.2f}", "Total Purchase Value":"${:,.2f}"})

In [None]:
#Sorts the previously created data frame called "pop_items_df_sorted" by returning largest values first
most_profitable_items = pop_items_df.sort_values(["Total Purchase Value"], ascending=False).head()
#Formats the sorted dataframe columns "Item Price" and "Total Purchase Value" to dollars
most_profitable_items.style.format({"Item Price":"${:,.2f}", "Total Purchase Value":"${:,.2f}"})