In [1]:
# 0) Setup: import packages
import requests
import pandas as pd
import numpy as np

In [2]:
pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 1200)

In [3]:
# 1) Pull data from the Makeup API
#    Main endpoint (all products):
#    http://makeup-api.herokuapp.com/api/v1/products.json
BASE_URL = "http://makeup-api.herokuapp.com/api/v1/products.json"

In [4]:
# --- Option A: pull ALL products in one call  ---
resp = requests.get(BASE_URL, timeout=30)
resp.raise_for_status() 

In [5]:
data_json = resp.json()  # list of dicts
print("Number of records pulled:", len(data_json))
print("Sample keys:", list(data_json[0].keys()))

Number of records pulled: 931
Sample keys: ['id', 'brand', 'name', 'price', 'price_sign', 'currency', 'image_link', 'product_link', 'website_link', 'description', 'rating', 'category', 'product_type', 'tag_list', 'created_at', 'updated_at', 'product_api_url', 'api_featured_image', 'product_colors']


In [6]:
# Convert to DataFrame
df_raw = pd.DataFrame(data_json)
df_raw.head()

Unnamed: 0,id,brand,name,price,price_sign,currency,image_link,product_link,website_link,description,rating,category,product_type,tag_list,created_at,updated_at,product_api_url,api_featured_image,product_colors
0,1048,colourpop,Lippie Pencil,5.0,$,CAD,https://cdn.shopify.com/s/files/1/1338/0845/co...,https://colourpop.com/collections/lippie-pencil,https://colourpop.com,Lippie Pencil A long-wearing and high-intensit...,,pencil,lip_liner,"[cruelty free, Vegan]",2018-07-08T23:45:08.056Z,2018-07-09T00:53:23.301Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#B28378', 'colour_name': 'BFF ..."
1,1047,colourpop,Blotted Lip,5.5,$,CAD,https://cdn.shopify.com/s/files/1/1338/0845/pr...,https://colourpop.com/collections/lippie-stix?...,https://colourpop.com,Blotted Lip Sheer matte lipstick that creates ...,,lipstick,lipstick,"[cruelty free, Vegan]",2018-07-08T22:01:20.178Z,2018-07-09T00:53:23.287Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#b72227', 'colour_name': 'Bee'..."
2,1046,colourpop,Lippie Stix,5.5,$,CAD,https://cdn.shopify.com/s/files/1/1338/0845/co...,https://colourpop.com/collections/lippie-stix,https://colourpop.com,"Lippie Stix Formula contains Vitamin E, Mango,...",,lipstick,lipstick,"[cruelty free, Vegan]",2018-07-08T21:47:49.858Z,2018-07-09T00:53:23.274Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#F2DEC3', 'colour_name': 'Fair..."
3,1045,colourpop,No Filter Foundation,12.0,$,CAD,https://cdn.shopify.com/s/files/1/1338/0845/pr...,https://colourpop.com/products/no-filter-matte...,https://colourpop.com/products/no-filter-matte...,"Developed for the Selfie Age, our buildable fu...",,liquid,foundation,"[cruelty free, Vegan]",2018-07-08T18:22:25.273Z,2018-07-09T00:53:23.313Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#F2DEC3', 'colour_name': 'Fair..."
4,1044,boosh,Lipstick,26.0,$,CAD,https://cdn.shopify.com/s/files/1/1016/3243/pr...,https://www.boosh.ca/collections/all,https://www.boosh.ca/,All of our products are free from lead and hea...,,lipstick,lipstick,"[Chemical Free, Organic]",2018-07-08T17:32:28.088Z,2018-09-02T22:52:06.669Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#CB4975', 'colour_name': 'Babs..."


In [7]:
# 2) Quick sanity checks (API + structure)
df_raw.shape, df_raw.columns

((931, 19),
 Index(['id', 'brand', 'name', 'price', 'price_sign', 'currency', 'image_link', 'product_link', 'website_link', 'description', 'rating', 'category', 'product_type', 'tag_list', 'created_at', 'updated_at', 'product_api_url', 'api_featured_image', 'product_colors'], dtype='object'))

In [8]:
# 3) pull only a subset
params = {
    "brand": "maybelline",
    # "product_type": "lipstick"
}

resp2 = requests.get(BASE_URL, params=params, timeout=30)
resp2.raise_for_status()
data_json2 = resp2.json()

df_maybelline = pd.DataFrame(data_json2)
print("Maybelline rows:", df_maybelline.shape[0])
df_maybelline.head()

Maybelline rows: 54


Unnamed: 0,id,brand,name,price,price_sign,currency,image_link,product_link,website_link,description,rating,category,product_type,tag_list,created_at,updated_at,product_api_url,api_featured_image,product_colors
0,495,maybelline,Maybelline Face Studio Master Hi-Light Light B...,14.99,,,https://d3t32hsnjxo7q6.cloudfront.net/i/991799...,https://well.ca/products/maybelline-face-studi...,https://well.ca,Maybelline Face Studio Master Hi-Light Light B...,5.0,,bronzer,[],2016-10-01T18:36:15.012Z,2017-12-23T21:08:50.624Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,[]
1,488,maybelline,Maybelline Fit Me Bronzer,10.29,,,https://d3t32hsnjxo7q6.cloudfront.net/i/d4f7d8...,https://well.ca/products/maybelline-fit-me-bro...,https://well.ca,Why You'll Love It\n\nLightweight pigments ble...,4.5,,bronzer,[],2016-10-01T18:36:05.584Z,2017-12-23T21:08:49.985Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#CF9978', 'colour_name': 'Medi..."
2,477,maybelline,Maybelline Facestudio Master Contour Kit,15.99,,,https://d3t32hsnjxo7q6.cloudfront.net/i/4f731d...,https://well.ca/products/maybelline-facestudio...,https://well.ca,Maybelline Facestudio Master Contour Kit is th...,,,bronzer,[],2016-10-01T18:35:40.504Z,2017-12-23T21:08:48.157Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,"[{'hex_value': '#9B7163', 'colour_name': None}..."
3,468,maybelline,Maybelline Face Studio Master Hi-Light Light B...,14.99,,,https://d3t32hsnjxo7q6.cloudfront.net/i/462103...,https://well.ca/products/maybelline-face-studi...,https://well.ca,Maybelline Face Studio Master Hi-Light Light B...,,powder,blush,[],2016-10-01T18:35:27.706Z,2017-12-23T21:08:47.102Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,[]
4,452,maybelline,Maybelline Face Studio Master Hi-Light Light B...,14.99,,,https://d3t32hsnjxo7q6.cloudfront.net/i/e8c59b...,https://well.ca/products/maybelline-face-studi...,https://well.ca,Maybelline Face Studio Master Hi-Light Light B...,5.0,powder,blush,[],2016-10-01T18:35:07.476Z,2017-12-23T20:51:17.460Z,http://makeup-api.herokuapp.com/api/v1/product...,//s3.amazonaws.com/donovanbailey/products/api_...,[]


In [10]:
# 4) Save a local copy
df_raw.to_csv("makeup_api_products_raw.csv", index=False)
print("Saved: makeup_api_products_raw.csv")

Saved: makeup_api_products_raw.csv


In [11]:
# 5) Create a working copy for cleaning
df = df_raw.copy()

# quick glance at missingness
missing_pct = (df.isna().mean().sort_values(ascending=False) * 100).round(1)
missing_pct.head(15)

rating                63.5
price_sign            60.5
currency              60.5
category              44.5
price                  1.5
brand                  1.3
description            0.1
tag_list               0.0
api_featured_image     0.0
product_api_url        0.0
updated_at             0.0
created_at             0.0
id                     0.0
product_type           0.0
website_link           0.0
dtype: float64