In [95]:
import numpy as np
import pandas as pd
import os

In [96]:
jsonpath = os.path.join('purchase_data2.json')
jsonfile = pd.read_json(jsonpath)
jsonfile.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,20,Male,93,Apocalyptic Battlescythe,4.49,Iloni35
1,21,Male,12,Dawne,3.36,Aidaira26
2,17,Male,5,Putrid Fan,2.63,Irim47
3,17,Male,123,Twilight's Carver,2.55,Irith83
4,22,Male,154,Feral Katana,4.11,Philodil43


In [97]:
# Player Count
## Total Number of Players
TotalPlayers = jsonfile['SN'].nunique()
TP = {'Total Players': [TotalPlayers]}
TotalPlayers_df = pd.DataFrame(TP)

TotalPlayers_df

Unnamed: 0,Total Players
0,74


In [98]:
# Purchasing Analysis (Total)
## Number of Unique Items
UniqueItems = jsonfile['Item Name'].nunique()

## Average Purchase Price
AvePurchase = "$" + str(round(jsonfile['Price'].mean(), 2))

## Total Number of Purchases
TotalPurchase = jsonfile['Price'].count()

## Total Revenue
TotalRevenue = "$" + str(round(jsonfile['Price'].sum(), 2))

PA = {'Number of Unique Items':[UniqueItems],
      'Average Price':AvePurchase,
      'Number of Purchases':[TotalPurchase],
      'Total Revenue':TotalRevenue
     }
PurchasingAnalysis_df = pd.DataFrame(PA)

PurchasingAnalysis_df

Unnamed: 0,Average Price,Number of Purchases,Number of Unique Items,Total Revenue
0,$2.92,78,63,$228.1


In [99]:
# Gender Demographics
UplayerList = jsonfile.drop_duplicates('SN', keep='first')
TotalCount = UplayerList["Gender"].count()
GenderCount = UplayerList["Gender"].value_counts()

## Percentage and Count of Male Players
MaleCount = GenderCount["Male"]
MalePercentage = round(MaleCount / TotalCount * 100, 2)

## Percentage and Count of Female Players
FemaleCount = GenderCount["Female"]
FemalePercentage = round(FemaleCount / TotalCount * 100, 2)

## Percentage and Count of Other / Non-Disclosed
OtherCount = GenderCount["Other / Non-Disclosed"]
OtherPercentage = round(OtherCount / TotalCount * 100, 2)

GenderDemo_df = pd.DataFrame([
    {'Percentage of Players': MalePercentage, 'Total Count': MaleCount},
    {'Percentage of Players': FemalePercentage, 'Total Count': FemaleCount},
    {'Percentage of Players': OtherPercentage, 'Total Count': OtherCount}
])
GenderDemo_df = GenderDemo_df.set_index([['Male', 'Female', 'Other / Non-Disclosed']])

GenderDemo_df

Unnamed: 0,Percentage of Players,Total Count
Male,81.08,60
Female,17.57,13
Other / Non-Disclosed,1.35,1


In [100]:
# Purchasing Analysis (Gender)
MalePurchase_df = jsonfile.loc[jsonfile['Gender'] == 'Male']
FemalePurchase_df = jsonfile.loc[jsonfile['Gender'] == 'Female']
OtherPurchase_df = jsonfile.loc[jsonfile['Gender'] == 'Other / Non-Disclosed']

## Purchase Count
MalePurchaseCount = MalePurchase_df['Price'].count()
FemalePurchaseCount = FemalePurchase_df['Price'].count()
OtherPurchaseCount = OtherPurchase_df['Price'].count()

## Average Purchase Price
MalePurchaseAve = '$'+str(round(MalePurchase_df['Price'].mean(), 2))
FemalePurchaseAve = '$'+str(round(FemalePurchase_df['Price'].mean(), 2))
OtherPurchaseAve = '$'+str(round(OtherPurchase_df['Price'].mean(), 2))

## Total Purchase Value
MalePurchaseTotal = round(MalePurchase_df['Price'].sum(), 2)
FemalePurchaseTotal = round(FemalePurchase_df['Price'].sum(), 2)
OtherPurchaseTotal = round(OtherPurchase_df['Price'].sum(), 2)

## Normalized Totals (normalizing for the # of people in each age group)
MaleNormalized = '$' + str(round((MalePurchaseTotal / MaleCount), 2))
FemaleNormalized = '$' + str(round(FemalePurchaseTotal / FemaleCount, 2))
OtherNormalized = '$' + str(round(OtherPurchaseTotal / OtherCount, 2))

PurchAnalysis_df = pd.DataFrame({'Gender':['Female', 'Male', 'Other / Non-Disclosed'],
                                 'Purchase Count':[FemalePurchaseCount, MalePurchaseCount, OtherPurchaseCount],
                                 'Average Purchase Price':[FemalePurchaseAve, MalePurchaseAve, OtherPurchaseAve],
                                 'Total Purchase Value':['$'+str(FemalePurchaseTotal), '$'+str(MalePurchaseTotal), '$'+str(OtherPurchaseTotal)],
                                 'Normalized Totals':[FemaleNormalized, MaleNormalized, OtherNormalized]},
                                 columns=['Gender', 'Purchase Count', 'Average Purchase Price', 'Total Purchase Value', 'Normalized Totals'])

PurchAnalysis_df = PurchAnalysis_df.set_index('Gender')

PurchAnalysis_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,13,$3.18,$41.38,$3.18
Male,64,$2.88,$184.6,$3.08
Other / Non-Disclosed,1,$2.12,$2.12,$2.12


In [101]:
#Age Demographics
begin = [0]
end = [100]
middle = list(range(9, 41, 5))
bins = begin + middle + end
group_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']
jsonfile['Age Group'] = pd.cut(jsonfile['Age'], bins, labels=group_names)

## Head Count
AgeHeadcount = jsonfile.drop_duplicates('SN', keep='first')
AgeTotal = AgeHeadcount['Age Group'].count()
EveryTotal = AgeHeadcount['Age Group'].value_counts(sort=False)
EveryPercent = round((EveryTotal/AgeTotal)*100, 2)

HeadCount_df = pd.DataFrame({'Total Count':EveryTotal,
                             'Percentage of Players':EveryPercent
})

HeadCount_df

Unnamed: 0,Percentage of Players,Total Count
<10,6.76,5
10-14,4.05,3
15-19,14.86,11
20-24,45.95,34
25-29,10.81,8
30-34,8.11,6
35-39,8.11,6
40+,1.35,1


In [102]:
AgeDemographics = jsonfile.groupby('Age Group')

## Purchase Count
AgeCount = AgeDemographics['Item Name'].count()

## Average Purchase Price
AgeAve = round(AgeDemographics['Price'].mean(), 2)

## Total Purchase Value
AgeTotal = round(AgeDemographics['Price'].sum(), 2)

## Normalized Totals (normalizing for the # of people in each age group)
UAgeDemo = jsonfile.drop_duplicates('SN', keep='first')
UAgeDemoCount = UAgeDemo['Age Group'].value_counts()

AgeDemo_df = pd.DataFrame({'Purchase Count':AgeCount,
                           'Average Purchase Price':AgeAve.map('${:,.2f}'.format),
                           'Total Purchase Value':AgeTotal.map('${:,.2f}'.format)},
                           columns=['Purchase Count', 'Average Purchase Price', 'Total Purchase Value'])

AgeDemo_df['Normalized Totals'] = (round(AgeTotal / UAgeDemoCount, 2)).map('${:,.2f}'.format)

AgeDemo_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,5,$2.76,$13.82,$2.76
10-14,3,$2.99,$8.96,$2.99
15-19,11,$2.76,$30.41,$2.76
20-24,36,$3.02,$108.89,$3.20
25-29,9,$2.90,$26.11,$3.26
30-34,7,$1.98,$13.89,$2.32
35-39,6,$3.56,$21.37,$3.56
40+,1,$4.65,$4.65,$4.65


In [103]:
# Top Spenders
PlayerPurchase = jsonfile.groupby('SN')
PlayerPurchaseCount = PlayerPurchase['Item Name'].count()
PlayerPurchaseAve = round(PlayerPurchase['Price'].mean(), 2)
PlayerPurchaseTotal = round(PlayerPurchase['Price'].sum(), 2)

TopSpender_df = pd.DataFrame({'Purchase Count':PlayerPurchaseCount,
                              'Average Purchase Price':PlayerPurchaseAve.map('${:,.2f}'.format),
                              'Total Purchase Value':PlayerPurchaseTotal.map('${:,.2f}'.format)},
                              columns=['Purchase Count', 'Average Purchase Price', 'Total Purchase Value'])

TopSpender_df = TopSpender_df.sort_values(by='Total Purchase Value', ascending=False)

TopSpender_df.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sundaky74,2,$3.70,$7.41
Aidaira26,2,$2.56,$5.13
Eusty71,1,$4.81,$4.81
Chanirra64,1,$4.78,$4.78
Alarap40,1,$4.71,$4.71


In [104]:
# Most Popular Items
temp_df = jsonfile.groupby(['Item ID','Item Name'])

## Total Purchase Value & Purchase Count
ItemTotal = temp_df['Price'].sum()
ItemCount = temp_df['Item ID'].count()

PopularItem_df = pd.DataFrame({'Total Purchase Value':ItemTotal.map('${:,.2f}'.format),
                               'Purchase Count':ItemCount})

## Item Price
PopularItem_df['Item Price'] = (ItemTotal/ItemCount).map('${:,.2f}'.format)
PopularItem_df = PopularItem_df[['Purchase Count', 'Item Price', 'Total Purchase Value']]
PopularItem_df = PopularItem_df.sort_values(by='Purchase Count', ascending=False)

PopularItem_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
94,Mourning Blade,3,$3.64,$10.92
90,Betrayer,2,$4.12,$8.24
111,Misery's End,2,$1.79,$3.58
64,Fusion Pummel,2,$2.42,$4.84
154,Feral Katana,2,$4.11,$8.22


In [105]:
# Most Profitable Items
temp_df = jsonfile.groupby(['Item ID','Item Name'])

## Total Purchase Value & Purchase Count
ItemTotal = temp_df['Price'].sum()
ItemCount = temp_df['Item ID'].count()

PopularItem_df = pd.DataFrame({'Total Purchase Value':ItemTotal.map('${:,.2f}'.format),
                               'Purchase Count':ItemCount})
## Item Price
PopularItem_df['Item Price'] = (ItemTotal/ItemCount).map('${:,.2f}'.format)
PopularItem_df = PopularItem_df[['Purchase Count', 'Item Price', 'Total Purchase Value']]
PopularItem_df = PopularItem_df.sort_values(by='Total Purchase Value', ascending=False)

PopularItem_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
117,"Heartstriker, Legacy of the Light",2,$4.71,$9.42
93,Apocalyptic Battlescythe,2,$4.49,$8.98
90,Betrayer,2,$4.12,$8.24
154,Feral Katana,2,$4.11,$8.22
180,Stormcaller,2,$2.77,$5.54
