In [None]:
%config IPCompleter.greedy=True

In [None]:
#Dependencies and Setup
import pandas as pd
import numpy as np

In [None]:
#File to Load
csv_file = 'Resources/HeroesOfPymoli.csv'

#Use Pandas to read in csv file 
heroes_df = pd.read_csv(csv_file, encoding="UTF-8")

In [None]:
total_players = len(heroes_df["SN"].value_counts())

In [None]:
#PLAYER COUNT: Total # of Players:
# Find Number of Total Players and display as a DataFrame

total_players_dict = [{"Total Players": total_players}]

#Create DataFrame from Dictionary
total_players_df = pd.DataFrame(total_players_dict)
total_players_df

In [None]:
#PURCHASING ANALYSIS (TOTAL):
#Unique Items
unique_df = heroes_df['Item ID'].unique()

unique_count = 0
for value in unique_df:
    unique_count = unique_count + 1 
    


In [None]:
#Average Price
average_price = heroes_df["Price"].mean()

In [None]:
#Number of Purchases
purchases = heroes_df['Purchase ID'].count()

In [None]:
#Total Revenue
total_revenue = heroes_df['Price'].sum()

In [None]:
#Purchasing Analysis DataFrame
purchasings_analysis_df = pd.DataFrame([[unique_count, average_price, purchases, total_revenue]], columns=['Number of Unique Items','Average Price','Number of Purchases','Total Revenue'])
purchasings_analysis_df 

purchasings_analysis_df.style.format({"Average Price":"${:,.2f}","Total Revenue":"${:,.2f}"})

In [None]:
#GENDER DEMOGRAPHICS
#Count and Percentage of Female/Male Players

count_gender_df = heroes_df["Gender"].value_counts()
percent_gender_df = (heroes_df['Gender'].value_counts()/heroes_df['Gender'].count())*100

gender_analysis_df = pd.DataFrame()
gender_analysis_df['Total Count'],gender_analysis_df['Percentage'] = [count_gender_df,percent_gender_df]


gender_analysis_df
#gender_analysis_df.style.format({"Percentage":"%"})



In [None]:
#Purchasing Analysis by Gender
male_purchase_df = pd.DataFrame
male_purchase_df = heroes_df.loc[heroes_df['Gender'] == "Male"]
male_purchase_count = male_purchase_df['Purchase ID'].count()
male_purchase_avg = male_purchase_df['Price'].mean()

In [None]:
female_purchase_df = pd.DataFrame
female_purchase_df = heroes_df.loc[heroes_df['Gender'] == "Female"]
female_purchase_count = female_purchase_df['Purchase ID'].count()
female_purchase_avg = female_purchase_df['Price'].mean()

In [None]:
other_purchase_df = pd.DataFrame
other_purchase_df = heroes_df.loc[heroes_df['Gender'] == "Other / Non-Disclosed"]
other_purchase_count = other_purchase_df['Purchase ID'].count()
other_purchase_avg = other_purchase_df['Price'].mean()

In [None]:
grouped_df = heroes_df.groupby(["Gender","SN"], as_index=False)['Price'].sum()

male_avgtotal_per_person = grouped_df[grouped_df['Gender']=='Male']['Price'].mean()
female_avgtotal_per_person = grouped_df[grouped_df['Gender']=='Female']['Price'].mean()
other_avgtotal_per_person = grouped_df[grouped_df['Gender']=="Other / Non-Disclosed"]['Price'].mean()

In [None]:
male_total_value = grouped_df[grouped_df["Gender"]=="Male"]["Price"].sum()
female_total_value = grouped_df[grouped_df["Gender"]=="Female"]["Price"].sum()
other_total_value = grouped_df[grouped_df["Gender"]=="Other / Non-Disclosed"]["Price"].sum()

In [None]:
#Purchasing Analysis by Gender DataFrame
purchase_analysis_gender = pd.DataFrame({"Gender":["Male","Female","Other / Non-Disclosed"],
                   "Purchase Count":[male_purchase_count,female_purchase_count,other_purchase_count],
                   "Average Purchase Price":[male_purchase_avg,female_purchase_avg,other_purchase_avg],
                   "Total Purchase Value":[male_total_value,female_total_value,other_total_value],
                   "Avg Total Purchase per Person":[male_avgtotal_per_person,female_avgtotal_per_person,other_avgtotal_per_person]})

purchase_analysis_gender.set_index('Gender')

purchase_analysis_gender.style.format({"Average Purchase Price":"${:,.2f}","Total Purchase Value":"${:,.2f}","Avg Total Purchase per Person":"${:,.2f}"})


In [None]:
#Age Demographics
#Create bins
age_bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

#Add bins to dataframe
heroes_df["Age Group"] = pd.cut(heroes_df["Age"], age_bins, labels=labels, include_lowest=True)

In [None]:
#group dataframe by ages into a series
grouped_age = heroes_df.groupby("Age Group")

In [None]:
#Find Total Count of Purchases for each age group
purchases_by_age = grouped_age["SN"].nunique()

In [None]:
#Find Percentage of purchases by age group
percentage_purchases_by_age = (purchases_by_age/total_players)*100

In [None]:
#Dataframe of Age Information
age_demographics_df = pd.DataFrame({"Total Count":purchases_by_age, "Percentages": percentage_purchases_by_age})
age_demographics_df

In [None]:
#Purchase Count by age
age_purchase_count = grouped_age["SN"].count()

In [None]:
#Average Purchase Price by age
avg_age_purchase = grouped_age["Price"].mean()

In [None]:
#Total Purchase Value by age
total_age_purchase = grouped_age["Price"].sum()

In [None]:
#Average Total Purchase per Person by age
avg_total_per_person_age = total_age_purchase/purchases_by_age

In [None]:
age_info = pd.DataFrame({"Purchase Count":age_purchase_count, "Avg. Purchase Price": avg_age_purchase,
                                     "Total Purchase Value":total_age_purchase, "Avg. per Person": avg_total_per_person_age})

#Format DataFrame
age_info.style.format({"Avg. Purchase Price":"${:,.2f}","Total Purchase Value":"${:,.2f}", "Avg. per Person":"${:,.2f}"})

In [None]:
#Group data by SN
spenders = heroes_df.groupby("SN")

#Find the total purchase of each user by screen name
purchase_count_by_sn = spenders["Purchase ID"].count()

#Average Purchase Price for each user (unique SN)
avg_purchase_price_sn = spenders["Price"].mean()

#Total Purchase Price for each user (unique SN)
total_purchase_price_sn = spenders["Price"].sum()

#Top Spenders Dataframe
top_spenders = pd.DataFrame({"Purchase Count":purchase_count_by_sn, "Avg. Purchase Price": avg_purchase_price_sn,
                                     "Total Purchase Value":total_purchase_price_sn})

#Sort Dataframe to show TOP 5 spenders
top_5 = top_spenders.sort_values(["Total Purchase Value"], ascending = False).head()

#Format Data Frame
top_5.style.format({"Avg. Purchase Price":"${:,.2f}", "Total Purchase Value":"${:,.2f}"})

In [None]:
#Item Analysis
items_df = pd.DataFrame()
items_df = heroes_df[["Item ID", "Item Name", "Price", "Purchase ID"]]

In [None]:
#Group by Item Name and Item ID
item_info = items_df.groupby(["Item Name", "Item ID"])

In [None]:
#Purchase Count for each item
purchase_count_by_item = item_info["Purchase ID"].count()

In [None]:
#Total Purchase Value for each Item
item_total_purchase_value = item_info["Price"].sum()

In [None]:
#Item price for each individual item
indv_item_price = item_total_purchase_value/purchase_count_by_item 

In [None]:
#Create Dataframe for popular items
popular_items = pd.DataFrame({"Purchase Count": purchase_count_by_item,"Item Price": indv_item_price,"Total Purchase Value":item_total_purchase_value})

#Sort DataFrame to show Top 5 
popular_items_top5 = popular_items.sort_values(["Purchase Count"], ascending=False).head()

#Format Dataframe
popular_items_top5.style.format({"Item Price":"${:,.2f}","Total Purchase Value":"${:,.2f}"})


In [None]:
#Most Profitable Items
popular_items_top5.sort_values(["Total Purchase Value"],ascending=False).head()

popular_items_top5.style.format({"Item Price":"${:,.2f}","Total Purchase Value":"${:,.2f}"})