I will build an Instagram bot that automatically posts new artist books and zines collected from a list of New York independent bookstores. 
Steps:
1. web scraping from sites like Printed Matter, Center for Book Arts, Bungee Space, compiling the results into a unified .csv file hosted on github. 
2. Detect and append newly added entries/titles from these sites to the existing dataset to update it (need a manual trigger)
3. Convert the data into post-ready text and images
4. connecting to Instagram via the Instagram Graph API
5. Schedule and automate posts using Python (with the instagram Graph API and instabot library)

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [24]:
#Scrape CBA's artists books from their bookshop page
headers = {
"User-Agent": "Mozilla/5.0"
}

all_books = []

for page in range(1, 8):
    url = f"https://centerforbookarts.org/book-shop?cats=artists-books&pg={page}"
    response = requests.get(url, headers=headers)

    soup = BeautifulSoup(response.content, 'html.parser')
    posts = soup.find_all('div', class_='post-content')

    for post in posts:
        category_div = post.find('div', class_='post-header')
        category = category_div.get_text(strip=True) if category_div else None

        title_tag = post.find('h2', class_='post-title')
        title = title_tag.get_text(strip=True) if title_tag else None

        image_div = post.find('div', class_='post-image')
        img_tag = image_div.find('img', decoding='async') if image_div else None
        image_link = img_tag['src'] if img_tag else None

        a_tag = post.find('a') if post else None
        book_url = a_tag['href'] if a_tag and 'href' in a_tag.attrs else None

        all_books.append({
            'category': category,
            'title': title,
            'image link': image_link,
            'url': book_url
        })

    time.sleep(1) 


df = pd.DataFrame(all_books)
df.to_csv("cba_artists_books.csv", index=False)

In [5]:
soup

<!DOCTYPE html>
<html lang="en-US"><head><title>Just a moment...</title><meta content="text/html; charset=utf-8" http-equiv="Content-Type"/><meta content="IE=Edge" http-equiv="X-UA-Compatible"/><meta content="noindex,nofollow" name="robots"/><meta content="width=device-width,initial-scale=1" name="viewport"/><style>*{box-sizing:border-box;margin:0;padding:0}html{line-height:1.15;-webkit-text-size-adjust:100%;color:#313131;font-family:system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji}body{display:flex;flex-direction:column;height:100vh;min-height:100vh}.main-content{margin:8rem auto;max-width:60rem;padding-left:1.5rem}@media (width <= 720px){.main-content{margin-top:4rem}}.h2{font-size:1.5rem;font-weight:500;line-height:2.25rem}@media (width <= 720px){.h2{font-size:1.25rem;line-height:1.5rem}}#challenge-error-text{background-image:url(data:image/svg+xml;base64,PHN2ZyB4bWx

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

BASE_URL = "https://3ssstudios.com"
START_URL = f"{BASE_URL}/collections/publications"
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

all_items = []

def scrape_detail_page(url):
    try:
        res = requests.get(url, headers=HEADERS)
        soup = BeautifulSoup(res.content, "html.parser")

        # Extract brand
        brand_tag = soup.find("p", itemprop="brand")
        brand = brand_tag.get_text(strip=True) if brand_tag else None

        # Extract description
        desc_tag = soup.find("div", itemprop="description")
        description = desc_tag.get_text(strip=True) if desc_tag else None

        return brand, description
    except Exception as e:
        print(f"Error scraping detail page {url}: {e}")
        return None, None

def scrape_page(url):
    response = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(response.content, "html.parser")

    catalog_container = soup.find("div", class_="grid grid--uniform grid--view-items")
    if not catalog_container:
        return [], None

    items = []

    for item in catalog_container.find_all("div", class_="grid-view-item"):
        img_tag = item.find("img", class_="grid-view-item__image")
        image_link = img_tag['src'] if img_tag and 'src' in img_tag.attrs else None

        title_tag = item.find("div", class_="h4 grid-view-item__title")
        title = title_tag.get_text(strip=True) if title_tag else None

        a_tag = item.find("a", href=True)
        item_url = f"{BASE_URL}{a_tag['href']}" if a_tag else None

       
        brand, description = scrape_detail_page(item_url) if item_url else (None, None)

        items.append({
            "title": title,
            "image link": image_link,
            "url": item_url,
            "brand": brand,
            "description": description
        })

        time.sleep(0.5)  

    next_page = find_next_url(soup)
    return items, next_page

def find_next_url(soup):
    pagination_links = soup.select("a.btn.btn--secondary.btn--narrow")
    for link in pagination_links:
        if ">" in link.get_text() or "Next" in link.get_text():
            return BASE_URL + link['href']
    return None

next_page = START_URL
while next_page:
    print(f"Scraping: {next_page}")
    items, next_page = scrape_page(next_page)
    all_items.extend(items)
    time.sleep(1)

df = pd.DataFrame(all_items)
df.to_csv("bungee_space_books.csv", index=False)



Scraping: https://3ssstudios.com/collections/publications
Scraping: https://3ssstudios.com/collections/publications?page=2&phcursor=eyJhbGciOiJIUzI1NiJ9.eyJzayI6InByb2R1Y3RfY3JlYXRlZF9hdCIsInN2IjoiMjAyNS0wNS0yOFQyMjozNjoxNC4wMDArMDA6MDAiLCJkIjoiZiIsInVpZCI6NDI2NjM0NzE0MTU1OTgsImwiOjI0LCJvIjowLCJyIjoiQ0RQIiwidiI6MSwicCI6Mn0.yTF9C2va2ZIY2_oQ2BcpZdkPtoOTb7o6LkccQW4Xgu0
Scraping: https://3ssstudios.com/collections/publications?page=3&phcursor=eyJhbGciOiJIUzI1NiJ9.eyJzayI6InByb2R1Y3RfY3JlYXRlZF9hdCIsInN2IjoiMjAyNS0wNS0wOVQxNjowNTo1My4wMDBaIiwiZCI6ImYiLCJ1aWQiOjQyNTY5NjAzMzE4MDYyLCJsIjoyNCwibyI6MCwiciI6IkNEUCIsInYiOjEsInAiOjN9.nnXTzHanEUyfEm1TetVnDyhO-ly0LoMek1mptUXBZh8
Scraping: https://3ssstudios.com/collections/publications?page=4&phcursor=eyJhbGciOiJIUzI1NiJ9.eyJzayI6InByb2R1Y3RfY3JlYXRlZF9hdCIsInN2IjoiMjAyNS0wMy0yN1QyMDoyNjo0My4wMDBaIiwiZCI6ImYiLCJ1aWQiOjQyMzYxNjQwODc4MzgyLCJsIjoyNCwibyI6MCwiciI6IkNEUCIsInYiOjEsInAiOjR9.38-Pgl5Em8ve2KPJvw4NHMnUxibcGeGjE8QxLkoR-AQ
Scraping: https://3ssst

KeyboardInterrupt: 