This will be my first foray of analysis in the business world.  I'm trying to analyze game user data via Pandas.

In [2]:
# Step 1: Import my dependencies
import pandas as pd
# Step 2: Set up the file path and download the information as a dataframe
file_to_load = "Resources/purchase_data.csv"
#read my data and store it as a dataframe
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
# Step 3: Get a count of the number of users in the game.
#The following code pulls only the unique SN values 
unique_users = purchase_data.drop_duplicates("SN")
users = len(unique_users["SN"])
summary_users = pd.DataFrame({"Total Users":[users]})
summary_users

Unnamed: 0,Total Users
0,576


In [4]:
#Step 4: Perform basic calculations to discover number of items, average price, number of purchases, and total revenue.
purchase_data.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price'], dtype='object')

In [5]:
#calculate number of items
# the following code pulls only the unique item ID's
unique_items = purchase_data.drop_duplicates("Item ID")
item_num = len(unique_items["Item ID"])
item_num

183

In [6]:
#calculate average price
price_avg = round(purchase_data["Price"].mean(),2)
print(price_avg)

3.05


In [7]:
#calculate number of purchases made
unique_purchases = purchase_data.drop_duplicates("Purchase ID")
purchases = len(unique_purchases["Purchase ID"])
purchases

780

In [8]:
#calculate total revenue
revenue = purchase_data["Price"].sum()
revenue

2379.77

In [9]:
# Now this needs to be formated into a table that I can show to everyone
summary_table_purchases = pd.DataFrame({"Number of Items": [item_num],
                                        "Total Average": [price_avg],
                                        "Number of Purchases": [purchases],
                                        "Total Revenue": [revenue]})
#applying special formatting so that everything is more readable
summary_table_purchases["Total Average"] = summary_table_purchases["Total Average"].map("${:.2f}".format)
summary_table_purchases["Total Revenue"] = summary_table_purchases["Total Revenue"].map("${:.2f}".format)

#display the table
summary_table_purchases

Unnamed: 0,Number of Items,Total Average,Number of Purchases,Total Revenue
0,183,$3.05,780,$2379.77


In [10]:
#Next we need to do gender demographics.  I'll pull a list from the unique players guide for that.
#first males
males = len(unique_users.loc[unique_users["Gender"] == "Male",:]) 
male_percent = round(males/users,2)*100
print(males)
print(male_percent)

484
84.0


In [11]:
#Next Females
females = len(unique_users.loc[unique_users["Gender"]== "Female",:])
female_percent = round(females/users,2)*100
print(females)
print(female_percent)

81
14.000000000000002


In [12]:
#Finally other
others = len(unique_users.loc[unique_users["Gender"] == "Other / Non-Disclosed",:])
others_percent = round(others/users,2)*100
print(others)
print(others_percent)

11
2.0


In [13]:
#Now to create the table
summary_table_gender = pd.DataFrame({"Users": [males, females, others],
                                        "Percentages": [male_percent,female_percent,others_percent]},
                                   index=["Male","Female","Other / Non-Disclosed"])
#formatting
summary_table_gender["Percentages"] = summary_table_gender["Percentages"].map("{:.0f}%".format)

summary_table_gender

Unnamed: 0,Users,Percentages
Male,484,84%
Female,81,14%
Other / Non-Disclosed,11,2%


In [14]:
purchase_data.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price'], dtype='object')

In [15]:
#Next we need a purchasing analysis by gender.  This will go over purchase count, average purchase price, total purchase value,
#average purchase total per person by gender

gender_purchases = purchase_data[["Purchase ID","SN","Gender","Price"]]


In [16]:
#Purchases per gender
gender_gp = gender_purchases.groupby("Gender").count()
purchase_gen = gender_gp["Purchase ID"]
#Average purchase price per gender
average_gp = gender_purchases.groupby("Gender").mean()
average_gen = average_gp["Price"]
#Total Purchase values
sum_gp = gender_purchases.groupby("Gender").sum()
sum_gp["Price"]
#When it's asking for average purchse total per person by gender, is this what they mean?
gender_individual = gender_purchases.groupby(["Gender","SN"]).mean()
gender_individual["Price"]

Gender                 SN           
Female                 Adastirin33      4.480
                       Aerithllora36    4.320
                       Aethedru70       3.540
                       Aidain51         3.450
                       Aiduesu86        4.480
                                        ...  
Other / Non-Disclosed  Lirtim36         1.330
                       Maluncil97       2.640
                       Rairith81        2.220
                       Siarithria38     3.455
                       Sundim98         4.750
Name: Price, Length: 576, dtype: float64

###Age Demographics

In [28]:
unique_users["Age"].max()
bins = [6,10,14,18,22,26,30,34,38,42,46,50]
group_names = ["7-10","11-14","15-18","19-22","23-26","27-30","31-34","35-38","39-42","43-46","47-50"]
unique_users["Age Demographics"]= pd.cut(unique_users["Age"],bins,labels = group_names)
unique_users
age_demo = unique_users.groupby("Age Demographics")
#purchase count
age_demo["Purchase ID"].count()
#average purchase price
age_demo["Price"].mean()
#total purchase value
age_demo["Price"].sum()
#Average p total per Person by Age Group
age_individual = unique_users.groupby(["Age Demographics","SN"])
age_individual[""]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
