In [4]:
import pandas as pd

# load the file
pymoli = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
pymoli_xx = pd.read_csv(pymoli)
pymoli_df = pd.read_csv(pymoli).dropna()

pymoli_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [5]:
# Create player demographics dataframe for later use
demographics = pymoli_df[["Gender", "SN", "Age"]].drop_duplicates()

# Create player_count variable for later use
player_count = pymoli_df["SN"].nunique()

total_players_df = pd.DataFrame({"Total Players": [player_count]})
total_players_df

Unnamed: 0,Total Players
0,576


In [6]:
#unique values core each category
#note there are 780 purchase IDs but only 576 unique screen names(i.e. unique people that bought items)
pymoli_df.nunique()


Purchase ID    780
SN             576
Age             39
Gender           3
Item ID        179
Item Name      179
Price          145
dtype: int64

In [25]:
#Purchasing Analysis (Total)
unique_items = pymoli_df["Item ID"].nunique()
average_price = pymoli_df["Price"].mean()
number_purchases = pymoli_df["Purchase ID"].nunique()
total_revenue = pymoli_df["Price"].sum()


Values = pd.DataFrame({"Number of Unique Items": [unique_items], 
                       "Average Price": [average_price], 
                       "Number of Purchases": [number_purchases], 
                       "Total Revenue": [total_revenue]})

Values["Average Price"] = Values["Average Price"].map('${:,.2f}'.format)
Values["Total Revenue"] = Values["Total Revenue"].map('${:,.2f}'.format)

Values 

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [9]:
#Gender data frame, gives you purchase count by gender
#this data frame has no duplicates 
gender = demographics["Gender"].value_counts()
gender

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [27]:
#Gender Demographics
percent_male_players = (484 / 576)*100
count_male_players = 484
percent_female_players = (81 / 576)*100
count_female_players = 81
percent_other_players = (11 / 576)*100
count_other_players = 11

#create gender values data frame
gender_values = pd.DataFrame(columns = ["Total Players", "Percentage of Players"], 
                             index = ["Male", "Female", "Other / Non-Disclosed"])

gender_values.loc["Male"] = [count_male_players, percent_male_players]
gender_values.loc["Female"] = [count_female_players, percent_female_players]
gender_values.loc["Other / Non-Disclosed"] = [count_other_players, percent_other_players]

gender_values["Percentage of Players"] = gender_values["Percentage of Players"].map('{:,.2f}%'.format)
gender_values

Unnamed: 0,Total Players,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [12]:
#Gender purchase count data frame, gives you purchase count by gender including duplicates
gender_count = pymoli_df["Gender"].value_counts()
gender_count

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [13]:
#groupby and perform calculations
gender_purchase_total = pymoli_df.groupby(["Gender"]).sum()["Price"].rename("Total Purchase Value")

gender_avg_price = pymoli_df.groupby(["Gender"]).mean()["Price"].rename("Average Purchase Price")

In [14]:
#calculations for summary table
purchase_count_male = 652
purchase_count_female = 113
purchase_count_other = 15
avg_price_male = gender_avg_price["Male"]
male_total_value = gender_purchase_total["Male"]
male_avg_total = male_total_value / count_male_players
avg_price_female = gender_avg_price["Female"]
female_total_value = gender_purchase_total["Female"]
female_avg_total = female_total_value / count_female_players
avg_price_other = gender_avg_price["Other / Non-Disclosed"]
other_total_value = gender_purchase_total["Other / Non-Disclosed"]
other_avg_total = other_total_value / count_other_players


In [28]:
# Purchasing Analysis (Gender)

purchase_summary = pd.DataFrame(columns = ["Purchase Count", "Avg Purchase Price", 
                                           "Total Purchase Value", "Avg Total Purchase per Person"], 
                             index = ["Male", "Female", "Other / Non-Disclosed"])

purchase_summary.loc["Male"] = [purchase_count_male, avg_price_male, male_total_value, male_avg_total]
purchase_summary.loc["Female"] = [purchase_count_female, avg_price_female, female_total_value, female_avg_total]
purchase_summary.loc["Other / Non-Disclosed"] = [purchase_count_other, avg_price_other, 
                                                 other_total_value, other_avg_total]

#format and map
purchase_summary["Avg Purchase Price"] = purchase_summary["Avg Purchase Price"].map('${:,.2f}'.format)
purchase_summary["Total Purchase Value"] = purchase_summary["Total Purchase Value"].map('${:,.2f}'.format)
purchase_summary["Avg Total Purchase per Person"] = purchase_summary["Avg Total Purchase per Person"].map('${:,.2f}'.format)


purchase_summary

Unnamed: 0,Purchase Count,Avg Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Male,652,$3.02,"$1,967.64",$4.07
Female,113,$3.20,$361.94,$4.47
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [29]:
#Age Demographics

# Create bins for ages
age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 50]

# Create labels for these bins
group_labels_age = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34",
                "35-39", "40+"]
    
    
# Slice the data and place it into bins and do calculations
demographics['Age Ranges'] =  pd.cut(demographics["Age"], age_bins, labels=group_labels_age)
age_demo_total = demographics["Age Ranges"].value_counts()
age_demo_percent = (age_demo_total / player_count)*100

age_demo_df = pd.DataFrame({'Total Count': age_demo_total, "Percentage of Players": age_demo_percent})
age_demo_df = age_demo_df.sort_index()

#format and map
age_demo_df["Percentage of Players"] = age_demo_df["Percentage of Players"].map('{:,.2f}%'.format)

age_demo_df

Unnamed: 0,Total Count,Percentage of Players
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


In [30]:
#Purchasing Analysis by Age:

# Slice the data and place it into bins
pymoli_df["Age Ranges"] = pd.cut(pymoli_df["Age"], age_bins, labels=group_labels_age)

# Calucaltions
age_purchase_total = pymoli_df.groupby(["Age Ranges"]).sum()["Price"].rename("Total Purchase Value")
age_mean = pymoli_df.groupby(["Age Ranges"]).mean()["Price"].rename("Average Purchse Price")
age_counts = pymoli_df.groupby(["Age Ranges"]).count()["Price"].rename("Purchase Count")
age_per_person = age_purchase_total / age_demo_df["Total Count"]

purchase_data_df = pd.DataFrame({"Purchase Count": age_counts, "Avg Purchase Price": age_mean, 
                                 "Total Purchase Value": age_purchase_total, "Avg Total Purchase per Person": age_per_person})

purchase_data_df["Avg Purchase Price"] = purchase_data_df["Avg Purchase Price"].map('${:,.2f}'.format)
purchase_data_df["Total Purchase Value"] = purchase_data_df["Total Purchase Value"].map('${:,.2f}'.format)
purchase_data_df["Avg Total Purchase per Person"] = purchase_data_df["Avg Total Purchase per Person"].map('${:,.2f}'.format)

purchase_data_df 

Unnamed: 0_level_0,Purchase Count,Avg Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


In [18]:
#groupby and perform calculations

SN_total_purchase = pymoli_xx.groupby(["SN"]).sum()["Price"].rename("Total Purchase Value")
SN_avg_price = pymoli_xx.groupby(["SN"]).mean()["Price"].rename("Average Purchase Price")
SN_purchase_count = pymoli_xx.groupby(["SN"]).count()["Price"].rename("Purchase Count")


In [31]:
#create data frame for top spenders by screen name
SN_purchase_summary = pd.DataFrame({"Total Purchase Value": SN_total_purchase, "Avg Purchase Price": SN_avg_price, 
                                     "Purchase Count": SN_purchase_count})

top_spenders = SN_purchase_summary.sort_values("Total Purchase Value", ascending=False)
#format and map
top_spenders["Avg Purchase Price"] = top_spenders["Avg Purchase Price"].map('${:,.2f}'.format)
top_spenders["Total Purchase Value"] = top_spenders["Total Purchase Value"].map('${:,.2f}'.format)
top_spenders = top_spenders.loc[:,["Purchase Count", "Avg Purchase Price", "Total Purchase Value"]]

top_spenders.head()

Unnamed: 0_level_0,Purchase Count,Avg Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


In [20]:
pymoli_xx.head()

Items_analysis = pymoli_xx.loc[:,["Item ID", "Item Name", "Price"]]

In [21]:
# GroupBy item ID and name and perform calculations
item_purchase_count = Items_analysis.groupby(["Item ID", "Item Name"]).count()["Price"].rename("Purchase Count")
item_price = Items_analysis.groupby(["Item ID", "Item Name"]).mean()["Price"].rename("Item Price")
item_total_purchase = Items_analysis.groupby(["Item ID", "Item Name"]).sum()["Price"].rename("Total Purchase Value")


In [32]:
#sort by most popular items
items_summary = pd.DataFrame({"Purchase Count": item_purchase_count, "Item Price": item_price, 
                                      "Total Purchase Value": item_total_purchase})

items_summary["Item Price"] = items_summary["Item Price"].map('${:,.2f}'.format)
items_summary["Total Purchase Value"] = items_summary["Total Purchase Value"].map('${:,.2f}'.format)


items_summary.sort_values(by='Purchase Count', ascending=False,inplace=True)
    
items_summary.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


In [33]:
#sort by most profitable items:

items_summary_profitable = pd.DataFrame({"Purchase Count": item_purchase_count, "Item Price": item_price, 
                                      "Total Purchase Value": item_total_purchase})

items_summary_profitable.sort_values(by='Total Purchase Value', ascending=False,inplace=True)

items_summary_profitable["Item Price"] = items_summary_profitable["Item Price"].map('${:,.2f}'.format)
items_summary_profitable["Total Purchase Value"] = items_summary_profitable["Total Purchase Value"].map('${:,.2f}'.format)

items_summary_profitable.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80


`Trend Analysis:`

**Trend 1:**
   Out of the 576 total players, the large majority are male, 484 or over 80%. Furthermore, the purchasing value of the male players far outweighed that of any other gender. Male players spent $1,967.64  out of the total of  $2,379.77 spent three groups. This breaks down to 82.68% of all money spent on the game was spent by male players. It can thus be deducted that Heroes of Pymoli is favorited by boys and men more so that any other gender.  

**Trend 2:**
   Furthermore, Heroes of Pymoli is most popular amongst young adults. Of the 576 total players, almost half were between the ages of 20 to 24. This age group consisted of 258 players--44.79% of the total participants. Expanding the age range out just 5 years on either end, making the range from ages 15 to 34, the percentage of players skyrockets to76.74%. Players between the ages of 20 to 24 made the most in game puchases as well, their purchase count topping out at 365 or 46.79% of all purchases. Thus, Heroes of Pymoli is played far more by those of a young adult age versus older and younger age demographics. 

**Trend 3:**

Finally, those in the age range are 20 to 24 are more likely to not only play Heroes of Pymoli, but spend money in the game as well. This age demographic has the highest total purchase value of all the age ranges. Their spending comprised of 46.81% of the total purchase value by all ages, spending a grand total of $1,114.06  out of the total of  $2,379.77 spent on Heroes of Pymoli. In addition, the average total purchase by each person in this group was also extremely high at $4.32 ranking in the top three compared to the other age ranges. Thus it can be concluded thatfor Heroes of Pymoli, the age range of 20 to 24 it a particularly important demographic that comprises a large part of their foundational players. 