### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [4]:
#Resources used: Stackoverflow

# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
purchasing_file = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
game_df = pd.read_csv(purchasing_file)
game_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [5]:
renamed_df=game_df.rename(columns={"Purchase ID": "Total Players"})
game_table=renamed_df[["Total Players"]]
game_table.count()

Total Players    780
dtype: int64

In [6]:
game_demo = game_df.loc[:,["SN", "Age", "Gender"]]
game_demo

Unnamed: 0,SN,Age,Gender
0,Lisim78,20,Male
1,Lisovynya38,40,Male
2,Ithergue48,24,Male
3,Chamassasya86,24,Male
4,Iskosia90,23,Male
...,...,...,...
775,Aethedru70,21,Female
776,Iral74,21,Male
777,Yathecal72,20,Male
778,Sisur91,7,Male


In [7]:
game_demo = game_demo.drop_duplicates()
game_demo

Unnamed: 0,SN,Age,Gender
0,Lisim78,20,Male
1,Lisovynya38,40,Male
2,Ithergue48,24,Male
3,Chamassasya86,24,Male
4,Iskosia90,23,Male
...,...,...,...
773,Hala31,21,Male
774,Jiskjask80,11,Male
775,Aethedru70,21,Female
777,Yathecal72,20,Male


In [8]:
total_players = game_demo.count()[0]
total_players

576

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [9]:
items=len(game_df["Item Name"].unique())
average_price=game_df["Price"].mean()
purchases=game_df["Item Name"].count()
revenue=game_df["Price"].sum()

In [25]:
summary_table = pd.DataFrame({"Number of Unique Items": [items],
                         "Average Price": [average_price],
                         "Purchases": [purchases],
                         "Revenue": [revenue]})
summary_table

Unnamed: 0,Number of Unique Items,Average Price,Purchases,Revenue
0,179,3.050987,Purchase Count Average ...,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [11]:
gender=game_demo["Gender"].value_counts()
gender_df=pd.DataFrame(gender)
percent_players=(game_demo["Gender"].value_counts()/(game_demo["Gender"].count()))*100
gender_df["Percentage of Players"]=percent_players
gender_df=gender_df.rename(columns={"Gender": "Total Players"})
gender_df["Percentage of Players"]=gender_df["Percentage of Players"].map("{:.2f}%".format)
gender_demo_df=pd.DataFrame(gender_df)
gender_demo_df

Unnamed: 0,Total Players,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [32]:

female_counts = len(game_df.loc[game_df["Gender"] == "Female", :])
male_counts =  len(game_df.loc[game_df["Gender"] == "Male", :])
other_counts = len(game_df.loc[game_df["Gender"] == "Other / Non-Disclosed", :])

female_avg_price_pd = game_df.loc[game_df["Gender"] == "Female", :]
female_avg_price = female_avg_price_pd["Price"].mean()
female_total = female_avg_price_pd["Price"].sum()
female_total_avg = female_total / gender[1]

male_avg_price_pd = game_df.loc[game_df["Gender"] == "Male", :]
male_avg_price = male_avg_price_pd["Price"].mean()
male_total = male_avg_price_pd["Price"].sum()
male_total_avg = male_total / gender[0]

other_avg_price_pd = game_df.loc[game_df["Gender"] == "Other / Non-Disclosed", :]
other_avg_price = other_avg_price_pd["Price"].mean()
other_total = other_avg_price_pd["Price"].sum()
other_total_avg = other_total / gender[2]

genderPurch_table = pd.DataFrame({"Gender" : ["Female", "Male", "Other / Non-Disclosed"],
                         "Purchase Count": [female_counts, male_counts, other_counts],
                         "Average Price": [female_avg_price, male_avg_price, other_avg_price],
                         "Total Purchase Value": [female_total, male_total, other_total],
                         "Avg Total Purchase per Person": [female_total_avg, male_total_avg, other_total_avg]})

genderPurch_table["Average Purchase Price"] = genderPurch_table["Average Price"].map("${:.2f}".format)
genderPurch_table["Total Purchase Value"] = genderPurch_table["Total Purchase Value"].map("${:.2f}".format)
genderPurch_table["Avg Total Purchase per Person"] = genderPurch_table["Avg Total Purchase per Person"].map("${:.2f}".format)
genderPurch_table = genderPurch_table.set_index("Gender")
genderPurchase = pd.DataFrame(genderPurch_table)
print(genderPurchase)

                       Purchase Count  Average Price Total Purchase Value  \
Gender                                                                      
Female                            113       3.203009              $361.94   
Male                              652       3.017853             $1967.64   
Other / Non-Disclosed              15       3.346000               $50.19   

                      Avg Total Purchase per Person Average Purchase Price  
Gender                                                                      
Female                                        $3.20                  $3.20  
Male                                          $3.02                  $3.02  
Other / Non-Disclosed                         $3.35                  $3.35  


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [50]:
bins=[0,9,14,19,24,29,35,39,500]
group_labels=["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]

In [51]:
age_demo = game_demo
age_demo["Age Group"] = pd.cut(age_demo["Age"], bins, labels = group_labels)
group_counts = age_demo["Age Group"].value_counts()
counts = []
counts_percent = []
for i in range(len(group_labels)):
    counts.append(group_counts[i])
    counts_percent.append((counts[i]/ total_players) * 100)
age_sum = pd.DataFrame({"Total Count" : counts, "Percentage of Players" : counts_percent, "Age Group" : group_labels})
age_sum["Percentage of Players"] = age_sum["Percentage of Players"].map("{:.2f}".format)
age_sum = age_sum.set_index("Age Group")
print(age_sum)

           Total Count Percentage of Players
Age Group                                   
<10                 17                  2.95
10-14               22                  3.82
15-19              107                 18.58
20-24              258                 44.79
25-29               77                 13.37
30-34               62                 10.76
35-39               21                  3.65
40+                 12                  2.08


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [7]:
age_purchase = game_df
age_purchase["Age Group"] = pd.cut(age_purchase["Age"], bins, labels = group_labels)
groups = []
purchase_count = []
avg_purchcase = []
total_purchase = []
avg_per_person = []
for i in range(len(group_labels)):
    groups.append(age_purchase.loc[age_purchase["Age Group"] == group_labels[i], :])
    purchase_count.append(len(groups[i]))
    total_purchase.append(groups[i]["Price"].sum())
    avg_purchcase.append(total_purchase[i] / purchase_count[i])
    avg_per_person.append(total_purchase[i] / counts[i])

age_purchase_sum = pd.DataFrame({"Purchase Count" : purchase_count, "Average Purchase Price" : avg_purchcase,
                                "Total Purchase Value" : total_purchase, 
                                "Avg Total Purchase per Person" : avg_per_person,
                                "Age Group" : group_labels})
age_purchase_sum = age_purchase_sum.set_index("Age Group")
age_purchase_sum["Average Purchase Price"] = age_purchase_sum["Average Purchase Price"].map("${:.2f}".format)
age_purchase_sum["Total Purchase Value"] = age_purchase_sum["Total Purchase Value"].map("${:.2f}".format)
age_purchase_sum["Avg Total Purchase per Person"] = age_purchase_sum["Avg Total Purchase per Person"].map("${:.2f}".format)
print(age_purchase_sum)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [8]:
top_spenders = game_df
names = top_spenders["SN"].value_counts().index.tolist()
names_count = top_spenders["SN"].value_counts()
top_groups = []
total = []
top_avg_price = []
for i in range(len(names)):
        top_groups.append(top_spenders.loc[top_spenders["SN"] == names[i], :])
        total.append(top_groups[i]["Price"].sum())
        top_avg_price.append(total[i] / names_count[i])
top_spenders_sum = pd.DataFrame({"Purchase Count" : names_count, "Average Purchase Price" : top_avg_price,
                                "Total Purchase Value" : total, "SN" : names})
top_spenders_sum = top_spenders_sum.sort_values("Total Purchase Value", ascending = False)                               
top_spenders_sum = top_spenders_sum.set_index("SN")
top_spenders_sum["Average Purchase Price"] = top_spenders_sum["Average Purchase Price"].map("${:.2f}".format)
top_spenders_sum["Total Purchase Value"] = top_spenders_sum["Total Purchase Value"].map("${:.2f}".format)
print(top_spenders_sum.head())

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [9]:
popular = game_df[["Item ID", "Item Name", "Price"]]
most_popular = popular.groupby(["Item ID", "Item Name"])
items_names = []
items_price = []
total_purchase_value = []
items_count = most_popular.agg(np.size)["Price"].tolist()
items_count.sort(reverse = True)
items_id = popular["Item ID"].value_counts().index.tolist()
items_count = popular["Item ID"].value_counts().tolist()
counter = 0
for i in range(len(items_id)):
        for j in range(len(popular)):
                if(popular["Item ID"][j] == items_id[i] and counter != 1):
                        items_names.append(popular["Item Name"][j])
                        items_price.append(popular["Price"][j])
                        counter = 1
        counter = 0
        total_purchase_value.append(items_count[i] * items_price[i])
sum_popular = pd.DataFrame({"Purchase Count" : items_count, "Item Price" : items_price,
                                "Total Purchase Value" : total_purchase_value,
                                "Item Name" : items_names, "Item ID" : items_id})
sum_popular["Purchase Count"] = sum_popular["Purchase Count"].astype(int)
sum_popular["Item Price"] = sum_popular["Item Price"].map("${:.2f}".format)

sum_profit = sum_popular.sort_values("Total Purchase Value", ascending = False)

sum_popular["Total Purchase Value"] = sum_popular["Total Purchase Value"].map("${:.2f}".format)
sum_popular = sum_popular.sort_values("Purchase Count", ascending = False)
sum_popular = sum_popular.set_index(["Item ID", "Item Name"])
print(sum_popular.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [10]:
sum_profit["Total Purchase Value"] = sum_profit["Total Purchase Value"].map("${:.2f}".format)
sum_profit = sum_profit.set_index(["Item ID", "Item Name"])
print(sum_profit.head())

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
92,Final Critic,8,$4.88,$39.04
103,Singed Scalpel,8,$4.35,$34.80
