Observations:

1. Men are most likely to play this game. 
2.	The Final Critic is the top seller from a popularity and profit perspective. 
3.	Players between 15 and 29 spend the most on in game items, with individual players <10, 20-24, and 35-39 spending the most per purchase. 


In [1]:
#Load Dependencies
import os
from os import path
import pandas as pd
#pd.options.display.float_format = '{:,.2f}'.format
import csv

In [2]:
#declare file name
purchase_data = "Resources/purchase_data.csv"

In [3]:
#import file into dataframe
purchase_data_df = pd.read_csv(purchase_data, encoding="ISO-8859-1")

In [4]:
#Rename SN column to Screen Name to make it more descriptive
purchase_data_df = purchase_data_df.rename (columns={"SN":"Screen Name"})

In [5]:
#Create Age Group Bins
bins = [0, 9, 14, 19, 24, 29,34,39,100]
age_groups = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39","40+"]

In [6]:
#Use pd.cut to assign age group to rows
purchase_data_df["Age Group"] = pd.cut(purchase_data_df["Age"], bins, labels=age_groups, include_lowest=True)

In [7]:
#calculate number of unique players
player_count = len(purchase_data_df["Screen Name"].unique())

In [8]:
#Create summary dataframe for number of players
player_summary_df = pd.DataFrame({"Total Unique Players": [player_count]})

In [9]:
#calculate number of unique items
item_count = len(purchase_data_df["Item ID"].unique())

In [10]:
#calculate average item purchase price
average_price = sum(purchase_data_df["Price"])/len(purchase_data_df["Purchase ID"])

In [11]:
#calculate total number of purchases
total_purchases = len(purchase_data_df["Purchase ID"])

In [12]:
#Calculate Total Revenue
total_revenue = sum(purchase_data_df["Price"])

In [13]:
#Creat purchase summary table
purchase_summary_df = pd.DataFrame({"# of Unique Items": [item_count], 
                                   "Avg Price": round(average_price,2),
                                   "# of Purchases": total_purchases,
                                   "Total Revenue": total_revenue})

In [14]:
#Create gender summary table by copying purchase data column, and dropping duplicates. 
Gender_df = purchase_data_df[['Gender', 'Screen Name']].copy()
Gender_df = Gender_df.drop_duplicates()

In [15]:
#Group By Gender
Gender_df = Gender_df.groupby('Gender').count()

In [16]:
#Insert column to calculate percentage of total players each gender consists of
Gender_df.insert(1,"Percentage", Gender_df['Screen Name']/Gender_df['Screen Name'].sum())

In [17]:
#rename columns to make more descriptive
Gender_df = Gender_df.rename (columns={"Screen Name":"Total Player Count"})

In [18]:
#sort gender_df dataframe
Gender_df.sort_values(by=['Percentage'], ascending=False, inplace=True)

In [19]:
#Create GenderSales summary dataframe
GenderSales_df = purchase_data_df[['Gender', 'Screen Name']].copy()
GenderSales_df = GenderSales_df.set_index('Gender')
GenderSales_df = GenderSales_df.groupby('Gender').count()
GenderSales_df = GenderSales_df.rename (columns={"Screen Name":"Purchase Count"})

In [20]:
#Calculate total purchase value by gender
GenderSales_df.insert(1,"Total Purchase Value", purchase_data_df.groupby('Gender').agg({'Price': ['sum']}))

In [21]:
#Calculate average purchase price by gender
GenderSales_df.insert(1,"Average Purchase Price (Gender)", round((GenderSales_df['Total Purchase Value']/GenderSales_df['Purchase Count']),2))

In [22]:
#calculate average purchase price of each unique player by gender
GenderSales_df.insert(3,"Average Purchase Price (Player/Gender)", round((GenderSales_df['Total Purchase Value']/Gender_df['Total Player Count']),2))

In [23]:
#create agegroup summary df
AgeGroups_df = purchase_data_df[['Age Group', 'Screen Name']].copy()
AgeGroups_df = AgeGroups_df.drop_duplicates()
AgeGroups_df = AgeGroups_df.groupby('Age Group').count()
AgeGroups_df = AgeGroups_df.rename (columns={"Screen Name":"Total Count"})

In [24]:
#Calculate percentage of players each age group makes up
AgeGroups_df.insert(1,"Percentage of Players", AgeGroups_df['Total Count']/player_count)

In [25]:
#Create agesales summary dataframe
AgeSales_df = purchase_data_df[['Age Group', 'Screen Name','Price']].copy()
AgeSales_df = AgeSales_df.set_index('Age Group')
AgeSales_df = AgeSales_df.groupby('Age Group').agg({'Screen Name': ['count'],'Price': ['sum']})
AgeSales_df.columns = AgeSales_df.columns.droplevel(1)
AgeSales_df = AgeSales_df.rename (columns={"Screen Name":"Purchase Count", "Price":"Total Purchase Value"})
AgeSales_df = AgeSales_df.rename_axis(None, axis=0)

In [26]:
#calculate average purchase price by age group
AgeSales_df.insert(1,"Average Purchase Price (Age Group)", round((AgeSales_df['Total Purchase Value']/AgeSales_df['Purchase Count']),2))

In [27]:
#calculate average purchase price of individual players by age group
AgeSales_df.insert(3,"Average Purchase Price (Player/Age Group)", round((AgeSales_df['Total Purchase Value']/AgeGroups_df['Total Count']),2))

In [28]:
#create top purchases summary dataframe
TopPurchase_df = purchase_data_df[['Purchase ID','Screen Name', 'Price','Price']].copy()
TopPurchase_df.columns = ['Purchase Count','Screen Name', 'Average Purchase Price', 'Total Purchase Value']
TopPurchase_df = TopPurchase_df.set_index('Screen Name')
TopPurchase_df = TopPurchase_df.groupby('Screen Name').agg({'Purchase Count': ['count'],'Average Purchase Price': ['mean'],'Total Purchase Value': ['sum']})
TopPurchase_df['Average Purchase Price'] = round(TopPurchase_df['Average Purchase Price'],2)
TopPurchase_df.columns = TopPurchase_df.columns.droplevel(1)
TopPurchase_df.sort_values(by=['Total Purchase Value'], ascending=False, inplace=True)

In [29]:
#create top item sales summary dfs by popularity and profit
TopSalesPop_df = purchase_data_df[['Item ID','Item Name', 'Purchase ID','Price','Price']].copy()
TopSalesPop_df.columns = ['Item ID','Item Name', 'Sales Count', 'Average Sales Price', 'Total Sales Value']
TopSalesPop_df = TopSalesPop_df.set_index(['Item ID', 'Item Name'])
TopSalesPop_df = TopSalesPop_df.groupby(['Item ID', 'Item Name']).agg({'Sales Count': ['count'],'Average Sales Price': ['mean'],'Total Sales Value': ['sum']})
TopSalesPop_df['Average Sales Price'] = round(TopSalesPop_df['Average Sales Price'],2)
TopSalesPop_df.columns = TopSalesPop_df.columns.droplevel(1)
TopSalesPop_df.sort_values(by=['Sales Count'], ascending=False, inplace=True)
TopSalesProf_df = TopSalesPop_df.copy()
TopSalesProf_df.sort_values(by=['Total Sales Value'], ascending=False, inplace=True)

In [30]:
#Format datafram columns
purchase_summary_df['Avg Price'] = purchase_summary_df['Avg Price'].map(lambda x: "${:,.2f}".format(x))
purchase_summary_df['Total Revenue'] = purchase_summary_df['Total Revenue'].map(lambda x: "${:,.2f}".format(x))
Gender_df['Percentage'] = Gender_df['Percentage'].map(lambda x: '{:.2%}'.format(x))
GenderSales_df['Average Purchase Price (Gender)'] = GenderSales_df['Average Purchase Price (Gender)'].map(lambda x: "${:.2f}".format(x))
GenderSales_df['Total Purchase Value'] = GenderSales_df['Total Purchase Value'].map(lambda x: "${:,.2f}".format(x))
GenderSales_df['Average Purchase Price (Player/Gender)'] = GenderSales_df['Average Purchase Price (Player/Gender)'].map(lambda x: "${:,.2f}".format(x))
AgeGroups_df['Percentage of Players'] = AgeGroups_df['Percentage of Players'].map(lambda x: '{:.2%}'.format(x))
AgeSales_df['Average Purchase Price (Age Group)'] = AgeSales_df['Average Purchase Price (Age Group)'].map(lambda x: "${:,.2f}".format(x))
AgeSales_df['Total Purchase Value'] = AgeSales_df['Total Purchase Value'].map(lambda x: "${:,.2f}".format(x))
AgeSales_df['Average Purchase Price (Player/Age Group)'] = AgeSales_df['Average Purchase Price (Player/Age Group)'].map(lambda x: "${:,.2f}".format(x))
TopPurchase_df['Average Purchase Price'] = TopPurchase_df['Average Purchase Price'].map(lambda x: "${:,.2f}".format(x))
TopPurchase_df['Total Purchase Value'] = TopPurchase_df['Total Purchase Value'].map(lambda x: "${:,.2f}".format(x))
TopSalesPop_df['Average Sales Price'] = TopSalesPop_df['Average Sales Price'].map(lambda x: "${:,.2f}".format(x))
TopSalesPop_df['Total Sales Value'] = TopSalesPop_df['Total Sales Value'].map(lambda x: "${:,.2f}".format(x))
TopSalesProf_df['Average Sales Price'] = TopSalesProf_df['Average Sales Price'].map(lambda x: "${:,.2f}".format(x))
TopSalesProf_df['Total Sales Value'] = TopSalesProf_df['Total Sales Value'].map(lambda x: "${:,.2f}".format(x))

In [31]:
player_summary_df

Unnamed: 0,Total Unique Players
0,576


In [32]:
purchase_summary_df

Unnamed: 0,# of Unique Items,Avg Price,# of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [33]:
Gender_df

Unnamed: 0_level_0,Total Player Count,Percentage
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [34]:
GenderSales_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price (Gender),Total Purchase Value,Average Purchase Price (Player/Gender)
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [35]:
AgeGroups_df

Unnamed: 0_level_0,Total Count,Percentage of Players
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


In [36]:
AgeSales_df

Unnamed: 0,Purchase Count,Average Purchase Price (Age Group),Total Purchase Value,Average Purchase Price (Player/Age Group)
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


In [37]:
TopPurchase_df.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Screen Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


In [38]:
TopSalesPop_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Sales Count,Average Sales Price,Total Sales Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


In [39]:
TopSalesProf_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Sales Count,Average Sales Price,Total Sales Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
