In [1]:
import pandas as pd
import altair as alt
from vega_datasets import data

# Dataframe

Connecting to API, setting up Dataframe, and cleaning data to include relevant exploration

In [2]:
file = pd.read_json('https://makeup-api.herokuapp.com/api/v1/products.json?product_type=foundation')
df = pd.DataFrame(file)

#rename columns in dataframe
df.rename(columns={'id': 'ID', 'brand': 'Brand', 'name':'Name', 'price':'Price', 'rating': 'Rating', 'category': 'Category'}, inplace=True)
# drop all entries where rating = NaN and brand = null
df.dropna(subset=['Rating', 'Brand'], inplace=True)
df = df.reset_index()
#remove price_sign, currency, product_type, created_at, updated_at, product_api_url
df = df.drop(columns=['index', 'price_sign', 'currency', 'product_type', 'created_at', 'updated_at', 'product_api_url'])

In [3]:
#Adjust datframe to take product colors array, count the number of objects in that array 
#and place that number in a new "Color Count" column
color_counts = []
#iterate over each row in the dataframe
for index, row in df.iterrows():
    #get the length of product_colors
    color_counts.append(len(row['product_colors']))

df['Color Count'] = color_counts
df

Unnamed: 0,ID,Brand,Name,Price,image_link,product_link,website_link,description,Rating,Category,tag_list,api_featured_image,product_colors,Color Count
0,389,marcelle,Marcelle BB Cream-to-Powder Makeup,22.99,https://d3t32hsnjxo7q6.cloudfront.net/i/4ca356...,https://well.ca/products/marcelle-bb-cream-to-...,https://well.ca,A perfect addition to Marcelle's best skin enh...,5.0,cream,[Canadian],//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#eabe9b', 'colour_name': 'Ligh...",2
1,388,physicians formula,Physicians Formula Super BB All-in-1 Beauty Ba...,12.99,https://d3t32hsnjxo7q6.cloudfront.net/i/4d7d79...,https://well.ca/products/physicians-formula-su...,https://well.ca,Physicians Formula Super BB All-in-1 Beauty Ba...,2.5,cream,[Gluten Free],//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#fbd0bd', 'colour_name': 'Ligh...",2
2,387,physicians formula,Physicians Formula Nude Wear Touch of Glow Sti...,12.99,https://d3t32hsnjxo7q6.cloudfront.net/i/2f5878...,https://well.ca/products/physicians-formula-nu...,https://well.ca,\nPhysicians Formula Nude Wear Touch of Glow S...,4.0,cream,[Gluten Free],//s3.amazonaws.com/donovanbailey/products/api_...,[],0
3,386,covergirl,CoverGirl & Olay Simply Ageless Foundation,15.99,https://d3t32hsnjxo7q6.cloudfront.net/i/fd47ea...,https://well.ca/products/covergirl-olay-simply...,https://well.ca,This breakthrough formula with Olay Regenerist...,5.0,cream,[],//s3.amazonaws.com/donovanbailey/products/api_...,[],0
4,385,sante,Sante Soft Cream Foundation,27.49,https://d3t32hsnjxo7q6.cloudfront.net/i/e468ef...,https://well.ca/products/sante-soft-cream-foun...,https://well.ca,Sante soft cream foundation has precious miner...,3.8,cream,[Natural],//s3.amazonaws.com/donovanbailey/products/api_...,[],0
5,384,covergirl,CoverGirl Ultimate Finish Liquid Powder Makeup,9.99,https://d3t32hsnjxo7q6.cloudfront.net/i/1aa7e9...,https://well.ca/products/covergirl-ultimate-fi...,https://well.ca,"Get the beauty of a liquid, a powder and conce...",5.0,cream,[],//s3.amazonaws.com/donovanbailey/products/api_...,[],0
6,383,revlon,Revlon Colorstay Whipped Creme Makeup,19.99,https://d3t32hsnjxo7q6.cloudfront.net/i/037b44...,https://well.ca/products/revlon-colorstay-whip...,https://well.ca,"Finally, truly decadent makeup offers \nultima...",4.4,cream,[],//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#f5dbc8', 'colour_name': 'Nude...",5
7,382,maybelline,Maybelline Dream Smooth Mousse Foundation,14.79,https://d3t32hsnjxo7q6.cloudfront.net/i/fb79e7...,https://well.ca/products/maybelline-dream-smoo...,https://well.ca,Why You'll Love ItUnique cream-whipped foundat...,3.8,cream,[],//s3.amazonaws.com/donovanbailey/products/api_...,[],0
8,381,revlon,Revlon New Complexion One-Step Makeup,18.29,https://d3t32hsnjxo7q6.cloudfront.net/i/3b9b3c...,https://well.ca/products/revlon-new-complexion...,https://well.ca,"Give your skin a bright, natural looking finis...",4.2,cream,[],//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#FBCCA0', 'colour_name': 'Ivor...",6
9,380,maybelline,Maybelline Fit Me Shine-Free Foundation Stick,10.99,https://d3t32hsnjxo7q6.cloudfront.net/i/d04e7c...,https://well.ca/products/maybelline-fit-me-shi...,https://well.ca,"Get flawless, shine-free skin instantly and on...",4.7,cream,[],//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#FFE4C6', 'colour_name': 'Porc...",6


# Price vs. Rating
___
### Iteration I:
Explore Price vs. Rating of individual products through a scatterplot. After analysis found that representation in this way was inconclusive and confusing. 

In [4]:
#Explores the price vs. Rating of individual products
alt.Chart(df).mark_circle(size=200).encode(
    x='Price',
    y='Rating',
    color='Brand',
    tooltip=['Brand', 'Name', 'Price', 'Rating', 'product_link']
).configure_axis(
    grid=True
).interactive()

#Conclusion: Inconclusive and hard to read

### Iteration II:
In an effort to make the graph more readable, I explored the use of <b>binned scatterplots</b> finding that price did not necessarily equate to quality of product.

In [5]:
alt.Chart(df).mark_circle(size=200).encode(
    alt.X('Price', bin=True),
    alt.Y('Rating'),
    color='Brand',
    tooltip=['Brand', 'Name', 'Price', 'Rating', 'product_link']
).configure_axis(
    grid=False
).interactive()

## Conclusion:

Price does not equate to quality of product

# Brand Prices and Quality:
___
After coming to the above conclusion, I decided to explore Brand Prices and Quality of those brands. 

### Iteration I:

In iteration I explored the average price of each brand. I found that in terms of price the most expensive brands were as follows:

<ol>
    <li>Dr. Hauschka</li>
    <li>Mineral Fusion</li>
    <li>Cargo Cosmetics</li>
</ol>

In [6]:
alt.Chart(df).mark_bar().encode(
    x = "Brand",
    y = "mean(Price):Q"
)

### Iteration II:

Iteration II explores brands and their overall ratings in terms of Foudnation. Here I found that average ratings were never below an average of 3.5. My conclusion of this iteration found that the best average ratings belonged to the brands `Annabelle`, `Cargo Cosmetics`, and `Marcelle`.

In [7]:
alt.Chart(df).mark_bar().encode(
    x = "Brand",
    y = "mean(Rating):Q"
)

### Iteration III:

Iteration III explored the combination of average price, average rating, and brand. The bar chart displays the aveage price of the different brands with the darkest bars in the color gradient being the best rated.

In [8]:
#get the avarge quality vs. average price for each brand
alt.Chart(df).mark_bar().encode(
    alt.X("Brand", axis=alt.Axis(title='Brand')),
    alt.Y("mean(Price):Q", axis=alt.Axis(title='Average Price')),
    color = alt.Color("mean(Rating)", scale=alt.Scale(scheme="goldorange"))
)

## Conclusion:

From the final iteration it can be concluded that while products like <b>Mineral Fusion</b> are not worth the expensive price products from <b>Dr. Hauschka</b> might be worth the extra money spent. However, you can get better products for a lower price from <b>Cargo Cosmetics</b>, <b>Marcelle</b>, and <b>Annabelle</b>.

The best brand for the lowest price was overwhelmingly `Annabelle`

# Color Diversity:
___
While it appears clear what brands would be considered the "best bang for your buck" it fails to consider the constant discrimination that occurs within the makeup industry. For this reason, it is important to look at the color diversity of brands and products. 

### Iteration I:

Iteration I explores the cost of products in relation to the amout of colors provided for that individual product. As shown below it is found that price and color diversity have little to no relation with most products.

In [9]:
alt.Chart(df).mark_circle(size=200).encode(
    alt.X("Price", bin=True),
    alt.Y("Color Count", axis=alt.Axis(title='Color Diversity')),
    color="Brand"
).configure_axis(
    grid=False
)

In [10]:
alt.Chart(df).mark_bar().encode(
    alt.X("Price", bin=True),
    alt.Y("mean(Color Count)", axis=alt.Axis(title='Color Diversity')),
)

### Iteration II: Brands and Color Diversity

In iteration II I wanted to explore how brands fair with color diversity. Finding that brands such as `Pure Anada`, `Maybeline`, and `L'Oreal` have the best color diversity. 

In [11]:
alt.Chart(df).mark_bar().encode(
    alt.X("Brand"),
    alt.Y("Color Count", axis=alt.Axis(title='Color Diversity'))
)

### Iteration III: Cost and Diversity

From this, I wanted to add the extra layer of looking at the price. Seeing if cheaper or more expensive brands provided better color diversity. From the graph below I display each brands, color diversity with the darker bars showing higher prices and lighter bars showing lower prices. 

From this we once again see that price doesn't necessarily equate to color diversity. It does however, give us insight into the ethicacy and market that quite a lot of brands are missing out on.

In [12]:
alt.Chart(df).mark_bar().encode(
    alt.X("Brand"),
    alt.Y("mean(Color Count)", axis=alt.Axis(title='Color Diversity')),
    color = alt.Color("mean(Price)", scale=alt.Scale(scheme="yellowgreenblue"))
)

### Iteration IV: Rating and Diversity

I was then curious to see how brand ratings compared to their color diversity. <b>Are these more diverser products providing quality?</b> The graph below once again displays Brand vs. Color Diversity with higher rated products being dark blue, and lower rated products being light green.

I found it once again interesting that brand average rating and color diversity didn't have too great of a relation.

In [13]:
alt.Chart(df).mark_bar().encode(
    alt.X("Brand"),
    alt.Y("mean(Color Count)", axis=alt.Axis(title='Color Diversity')),
    color = alt.Color("mean(Rating)", scale=alt.Scale(scheme="goldorange"))
)

In [14]:
alt.Chart(df).mark_circle().encode(
    alt.X("Brand", axis=alt.Axis(title='Brand')),
    alt.Y("count(product_colors)", axis=alt.Axis(title='Color Diversity')),
    color = alt.Color("mean(Price)", scale=alt.Scale(scheme="goldorange")),
    size = "mean(Rating)",
    tooltip=['Brand', 'mean(Price)', 'mean(Rating)', 'website_link']
).configure_axis(
    grid=False
)

In [15]:
alt.Chart(df).mark_circle().encode(
    alt.X('Brand', axis=alt.Axis(title='Brand')),
    alt.Y('mean(Price)', axis=alt.Axis(title='Average Price')),
    color = alt.Color('count(product_colors)', scale=alt.Scale(scheme="goldorange")),
    size = "mean(Rating)",
    tooltip = ['Brand', 'mean(Price)', 'mean(Rating)', 'website_link']
).configure_axis(
    grid=False
).interactive()

# Final Thoughts:
___
### Price vs. Rating

Price does not equate to quality of product

### Brand Prices and Quality

Most expensive brands:
<ol>
    <li>Dr. Hauschka</li>
    <li>Mineral Fusion</li>
    <li>Cargo Cosmetics</li>
</ol>

Best rated brands:
<ol>
    <li>Annabelle</li>
    <li>Cargo Cosmetics</li>
    <li>Marcelle</li>
</ol>

While products like <b>Mineral Fusion</b> are not worth the expensive price products from <b>Dr. Hauschka</b> might be worth the extra money spent. However, you can get better products for a lower price from <b>Cargo Cosmetics</b>, <b>Marcelle</b>, and <b>Annabelle</b>. The best brand for the lowest price was overwhelmingly `Annabelle`

### Color Diversity

Overwhelmingly, color diversity is not considered into account in terms of cost or ratings of products.
Brands such as `Pure Anada`, `Maybeline`, and `L'Oreal` have the greatest color diversity.