### Heroes Of Pymoli Data Analysis
* Of the 1163 active players, the vast majority are male (84%). There also exists, a smaller, but notable proportion of female players (14%).

* Our peak age demographic falls between 20-24 (44.8%) with secondary groups falling between 15-19 (18.60%) and 25-29 (13.4%).  
-----

### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

## Player Count

In [3]:
no_of_players = purchase_data['SN'].nunique()
print("Total Number of Players: " + str(no_of_players))

Total Number of Players: 576


## Purchasing Analysis (Total)

In [4]:
no_of_items = purchase_data['Item ID'].nunique()
avg_price = round(purchase_data['Price'].mean(), 2)
purch_ID = purchase_data['Purchase ID'].max()
revenue = purchase_data['Price'].sum()

In [5]:
summary_df = pd.DataFrame(
    {"# of Items": [str(no_of_items)],
     "Average Price": ["$" + str(avg_price)],
     "# of Items": [str(purch_ID)],
     "Total Revenue": ["$" + str(revenue)]})
summary_df

Unnamed: 0,# of Items,Average Price,Total Revenue
0,779,$3.05,$2379.77


## Gender Demographics

In [30]:
total = purchase_data['Gender'].count()
male = purchase_data['Gender'].value_counts(normalize=False).loc['Male']
female = purchase_data['Gender'].value_counts(normalize=False).loc['Female']
others = purchase_data['Gender'].value_counts(normalize=False).loc['Other / Non-Disclosed']

male_p = round((100*male)/total, 2)
female_p = round((100*female)/total, 2)
others_p = round((100*others)/total, 2)

In [31]:
gender_df = pd.DataFrame(
    {"Gender": ["Male", "Female", "Other / Non-Disclosed"],
     "Participants": [str(male), str(female), str(others)],
     "Percentage": [str(male_p) + "%", str(female_p) + "%", str(others_p) + "%"]
    })
gender_df

Unnamed: 0,Gender,Participants,Percentage
0,Male,652,83.59%
1,Female,113,14.49%
2,Other / Non-Disclosed,15,1.92%



## Purchasing Analysis (Gender)

In [42]:
gender_group = purchase_data[["SN","Gender","Price"]]
gender_count = gender_group["Gender"].value_counts()

gender_purchase = [gender_count[0],gender_count[1],gender_count[2]]


gender_total_purchase = [total_spent.iloc[1,0], total_spent.iloc[0,0], total_spent.iloc[2,0]]

gender_avg_purchase = [total_spent.iloc[1,0]/gender_count[0], total_spent.iloc[0,0]/gender_count[1], total_spent.iloc[2,0]/gender_count[2]]


In [43]:
gender_df = pd.DataFrame({
    "Gender": ["Male", "Female", "Other / Non-Disclosed"],
    "Purchase Count": gender_purchase,
    "Average Purchase Price" : gender_avg_purchase,
    "Total Purchase Value": gender_total_purchase,
})

gender_df.head()


Unnamed: 0,Gender,Purchase Count,Average Purchase Price,Total Purchase Value
0,Male,652,3.017853,1967.64
1,Female,113,3.203009,361.94
2,Other / Non-Disclosed,15,3.346,50.19


## Age Demographics

In [15]:

fullcount = purchase_data["SN"].nunique()
years_10 = purchase_data[purchase_data["Age"] < 10]
years_19 = purchase_data[(purchase_data["Age"] >= 10) & (purchase_data["Age"] <= 19)]
years_29 = purchase_data[(purchase_data["Age"] >= 20) & (purchase_data["Age"] <= 29)]
years_39 = purchase_data[(purchase_data["Age"] >= 30) & (purchase_data["Age"] <= 39)]
years_49 = purchase_data[(purchase_data["Age"] >= 40) & (purchase_data["Age"] <= 49)]

age_df = pd.DataFrame({"Age": ["<10", "10-19", "20-29", "30-39", "40-49"],
                        "Percentage of Players": [(years_10["SN"].nunique()/fullcount)*100, (years_19["SN"].nunique()/fullcount)*100,
                                                  (years_29["SN"].nunique()/fullcount)*100, (years_39["SN"].nunique()/fullcount)*100,
                                                 (years_49["SN"].nunique()/fullcount)*100]
                      })

age_final = age_df.set_index("Age")
age_final

Unnamed: 0_level_0,Percentage of Players
Age,Unnamed: 1_level_1
<10,2.951389
10-19,22.395833
20-29,58.159722
30-39,14.409722
40-49,2.083333


## Top Spenders

In [20]:
df = purchase_data
sn_total_purchase = df.groupby('SN')['Price'].sum().to_frame()
sn_purchase_count = df.groupby('SN')['Price'].count().to_frame()
sn_purchase_avg = df.groupby('SN')['Price'].mean().to_frame()

sn_total_purchase.columns=["Total Purchase Value"]
join_one = sn_total_purchase.join(sn_purchase_count, how="left")
join_one.columns=["Total Purchase Value", "Purchase Count"]

join_two = join_one.join(sn_purchase_avg, how="inner")
join_two.columns=["Total Purchase Value", "Purchase Count", "Average Purchase Price"]

top_spenders_df = join_two[["Purchase Count", "Average Purchase Price", "Total Purchase Value"]]
top_spenders_final = top_spenders_df.sort_values('Total Purchase Value', ascending=False).head()
top_spenders_final

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Idastidru52,4,3.8625,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.405,13.62
Iskadarya95,3,4.366667,13.1
