In [43]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "game_data.csv"

# Read Purchasing File and store into Pandas data frame
df = pd.read_csv(file_to_load)
df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [7]:
number_of_players = df["Purchase ID"].count()
print("Total Players: " + str(number_of_players))

Total Players: 780


In [20]:
number_of_items = df['Item ID'].nunique()
average_price = df["Price"].mean()
total_revenue = df["Price"].sum()
summary = pd.DataFrame({"Number of Unique Items" : [number_of_items],
                       "Average Price" : [average_price],
                       "Number of Sales" : [number_of_players],
                       "Total Revenue" : [total_revenue]})
summary.head()

Unnamed: 0,Number of Unique Items,Average Price,Number of Sales,Total Revenue
0,183,3.050987,780,2379.77


In [28]:
gender = df["Gender"].value_counts().reset_index()
gender.columns = ["Gender", "Count"]
gender["Percent"] = gender["Count"]/number_of_players*100
gender.head()

Unnamed: 0,Gender,Count,Percent
0,Male,652,83.589744
1,Female,113,14.487179
2,Other / Non-Disclosed,15,1.923077


In [42]:
m_only = df.loc[df["Gender"] == "Male", :]
m_count = m_only["Purchase ID"].count()
m_average = m_only["Price"].mean()
m_total = m_only["Price"].sum()
m_per_person = m_total/m_count

f_only = df.loc[df["Gender"] == "Female", :]
f_count = f_only["Purchase ID"].count()
f_average = f_only["Price"].mean()
f_total = f_only["Price"].sum()
f_per_person = f_total/f_count

o_only = df.loc[df["Gender"] == "Other / Non-Disclosed", :]
o_count = o_only["Purchase ID"].count()
o_average = o_only["Price"].mean()
o_total = o_only["Price"].sum()
o_per_person = o_total/o_count

gender_analysis = pd.DataFrame({"Gender":["Male", "Female", "Other/Non-Disclosed"],
                                "Purchase Count" : [m_count, f_count, o_count],
                                "Average Purchase Price" : [m_average, f_average, o_average],
                                "Total Purchase Value" : [m_total, f_total, o_total],
                                "Avg Total Purchase per Person" : [m_per_person, f_per_person, o_per_person]})
gender_analysis.head()

Unnamed: 0,Gender,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
0,Male,652,3.017853,1967.64,3.017853
1,Female,113,3.203009,361.94,3.203009
2,Other/Non-Disclosed,15,3.346,50.19,3.346


In [69]:
bins = [0,10,14,19,24,29,34,39,100]
labels = ["<10", "11-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
df["Age Bracket"] = pd.cut(df["Age"], bins, labels=labels)
grouped = df.groupby("Age Bracket")
age = grouped.count()
age["Percent"] = age["Age"]/number_of_players*100
age["Count"] = age["Age"]
age_counts = age[["Count", "Percent"]]
age_counts

Unnamed: 0_level_0,Count,Percent
Age Bracket,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,32,4.102564
11-14,19,2.435897
15-19,136,17.435897
20-24,365,46.794872
25-29,101,12.948718
30-34,73,9.358974
35-39,41,5.25641
40+,13,1.666667


In [81]:
age_summary = age_counts[[]]
age_count = grouped["Price"].count()
age_average = grouped["Price"].mean()
age_total = grouped["Price"].sum()
age_per_person = age_total/age_count

age_summary["Purchase Count"] = age_count
age_summary["Average Purchase Price"] = age_average
age_summary["Total Purchase Price"] = age_total
age_summary["Average Per Person"] = age_per_person

age_summary

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Price,Average Per Person
Age Bracket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,32,3.405,108.96,3.405
11-14,19,2.681579,50.95,2.681579
15-19,136,3.035956,412.89,3.035956
20-24,365,3.052219,1114.06,3.052219
25-29,101,2.90099,293.0,2.90099
30-34,73,2.931507,214.0,2.931507
35-39,41,3.601707,147.67,3.601707
40+,13,2.941538,38.24,2.941538


In [120]:
user_grouped = df.groupby("SN")
user = user_grouped.count()
user_summary = user[[]]
user_count = user_grouped["Price"].count()
user_average = user_grouped["Price"].mean()
user_total = user_grouped["Price"].sum()

user_summary["Purchase Count"] = user_count
user_summary["Average Purchase Price"] = user_average
user_summary["Total Purchase Value"] = user_total

user_summary.sort_values(by=["Purchase Count"], ascending=False, inplace=True)
user_summary.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Iral74,4,3.405,13.62
Idastidru52,4,3.8625,15.45
Asur53,3,2.48,7.44
Inguron55,3,3.703333,11.11


In [126]:
item = df[["Item ID", "Item Name", "Price"]]
item_grouped = item.groupby(["Item ID", "Item Name"])
item1 = item_grouped.count()
item_pop = item1[[]]
i_count = item_grouped.count()
i_price = item["Price"]
i_total = i_count*i_price

item_pop["Purchase Count"] = i_count
item_pop["Item Price"] = i_price
item_pop["Total Purchase Value"] = i_total

item_pop.head()

ValueError: Wrong number of items passed 781, placement implies 1