# 01 · Data Pull (Fake Store API)
Fetch products, users, carts and save to CSV.

In [19]:
import requests, pandas as pd, pathlib
BASE = "https://fakestoreapi.com"
OUT = pathlib.Path("../data"); OUT.mkdir(parents=True, exist_ok=True)

def fetch(path: str):
    r = requests.get(f"{BASE}{path}", timeout=30)
    r.raise_for_status()
    return r.json()

products = pd.DataFrame(fetch("/products"))
categories = pd.Series(fetch("/products/categories"), name="category")
users = pd.DataFrame(fetch("/users"))
carts = pd.DataFrame(fetch("/carts"))

products.to_csv(OUT/"products.csv", index=False)
users.to_csv(OUT/"users_raw.csv", index=False)
carts.to_csv(OUT/"carts_raw.csv", index=False)
pd.DataFrame(categories).to_csv(OUT/"categories.csv", index=False)
print("Saved products, users_raw, carts_raw, categories to", OUT)

Saved products, users_raw, carts_raw, categories to ../data


In [None]:
##### Cleaning carts table #####

# Since column '__v' has zero variation, we drop the column
carts['__v'].unique()
carts.drop(columns="__v", inplace=True)

# The 'products' column contains a list of dictionaries, we need to explode it into multiple rows
# Each dictionary has 'productId' and 'quantity' keys, we will split them into separate columns
# After exploding, we will have multiple rows for each cartId, one for each product in the cart
# Finally, we rename the columns to be more descriptive
carts_exploded = carts.explode('products')
carts_exploded = pd.concat([carts_exploded.drop(['products'], axis=1), carts_exploded['products'].apply(pd.Series)], axis=1)
carts_exploded.rename(columns={"productId": "product_id", "quantity": "product_quantity"}, inplace=True)
carts_exploded.to_csv(OUT/"carts.csv", index=False) # Save the cleaned carts table


In [None]:
##### Cleaning users table #####

# Check column '__v' and drop it since it has zero variation
users['__v'].unique()
users.drop(columns="__v", inplace=True)

# explore 'name' column into 'firstname' and 'lastname'
users_exploded = pd.concat([users.drop(['name'], axis=1), users['name'].apply(pd.Series)], axis=1)
users_exploded.rename(columns={"firstname": "first_name", "lastname": "last_name"}, inplace=True)

# explode 'address' column into separate columns
users_exploded = pd.concat([users_exploded.drop(['address'], axis=1), users_exploded['address'].apply(pd.Series)], axis=1)
users_exploded.to_csv(OUT/"users.csv", index=False)
