# Obtaining Data from Nextday Koi

In [15]:
# Imports

from bs4 import BeautifulSoup
import requests
import pandas as pd
from urllib.parse import urljoin
import os
import uuid
import time
import re

In [16]:
# Define the website

BASE_URL = 'https://nextdaykoi.com/shop/'
START_URL = 'https://nextdaykoi.com/shop/page/{}/'
website = requests.get(BASE_URL)

# Define the output directory

OUTPUT_DIR = 'nextdaykoi_data'
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [17]:
# Check status

website.status_code

200

In [18]:
# Get the object

soup = BeautifulSoup(website.text, "html.parser")

In [19]:
soup

<!DOCTYPE html>

<html lang="en-US">
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" name="viewport">
<meta content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1" name="robots"/>
<style>img:is([sizes="auto" i], [sizes^="auto," i]) { contain-intrinsic-size: 3000px 1500px }</style>
<!-- This site is optimized with the Yoast SEO Premium plugin v21.5 (Yoast SEO v21.5) - https://yoast.com/wordpress/plugins/seo/ -->
<title>Koi &amp; Goldfish For Sale | Next Day Koi</title>
<meta content="Everything you need to start, grow, and maintain your koi experience: Koi Fish For Sale, Butterfly Koi For Sale, Goldfish For Sale, and Fish Food For Sale." name="description"/>
<link href="https://nextdaykoi.com/shop/" rel="canonical"/>
<link href="https://nextdaykoi.com/shop/page/2/" rel="next"/>
<meta content="en_US" property="og:locale"/>
<meta content="article" property="og:type"/>
<meta content="Sh

<div class="wc-loop-product-add-to-cart-wrapper"><a class="product_type_simple add_to_cart_button ajax_add_to_cart btn btn-inline" data-product_id="2830704" data-product_sku="H0502K51B04415C01" data-quantity="1" href="?add-to-cart=2830704" rel="nofollow"><span class="add_to_cart_button_text">Add to cart</span></a><span class="pys_list_name_productdata" data-pys_list_name_productlist_id="" data-pys_list_name_productlist_name="" style="display:none; visibility:hidden;"></span><span class="wowmall-wc-quick-view-button-wrapper"><a class="wowmall-wc-quick-view-button btn btn-icon" href="#"><span class="btn-text">Quick view</span></a></span></div></div></li>
<li class="col-xxl col-xl-4 col-lg-6 col-sm-6 product type-product post-2830703 status-publish instock product_cat-efp-goldfish-lots product_cat-goldfish product_cat-imported-goldfish product_cat-oranda has-post-thumbnail sold-individually taxable shipping-taxable purchasable product-type-simple">
<div class="wc-loop-product-wrapper"><a class="woocommerce-LoopProduct-link woocommerce-loop-product__link" href="https://nextdaykoi.com/product/lot-of-3-2-5-3-imported-assorted-oranda-6/"></a><div class="wc-loop-product-cats-rating-wrapper"> <div class="loop-product-categories">
<a href="https://nextdaykoi.com/koi-fish/goldfish/efp-goldfish-lots/" rel="tag">EFP Goldfish Lots</a>, <a href="https://nextdaykoi.com/koi-fish/goldfish/" rel="tag">Goldfish For Sale</a>, <a href="https://nextdaykoi.com/koi-fish/goldfish/imported-goldfish/" rel="tag">Imported Goldfish</a>, <a href="https://nextdaykoi.com/koi-fish/goldfish/oranda/" rel="tag">Oranda</a> </div>
</div><h2 class="wc-loop-product-title"><a href="https://nextdaykoi.com/product/lot-of-3-2-5-3-imported-assorted-oranda-6/">Lot of (3) 2.5-3” Imported Assorted Oranda</a></h2>
<span class="price">$54.00</span>

In [20]:
product = soup.select("ul.products > li.product")

In [21]:
product

[<li class="col-xxl col-xl-4 col-lg-6 col-sm-6 product type-product post-2832366 status-publish first instock product_cat-asagi-shusui product_cat-imported-koi product_cat-koi product_cat-shusui product_cat-single-fish product_cat-single-koi has-post-thumbnail sold-individually taxable shipping-taxable purchasable product-type-simple">
 <div class="wc-loop-product-wrapper"><a class="woocommerce-LoopProduct-link woocommerce-loop-product__link" href="https://nextdaykoi.com/product/3-5-imported-shusui-12/"><img alt="" class="attachment-woo_img_size_small size-woo_img_size_small swiper-lazy" data-src="https://d2e07cbkdk0gwy.cloudfront.net/wp-content/uploads/2025/05/2025.05.05_G_015_15C-69.jpg" decoding="async" height="200" src="

In [22]:
data_collected = {}

def scrape_page(page_num):
    url  = START_URL.format(page_num)
    resp = requests.get(url)
    if resp.status_code != 200:
        return False    # no such page

    soup = BeautifulSoup(resp.text, "html.parser")

    products = soup.select("ul.products > li.product")
    if not products:
        return False    # no products → end of pagination

    for prod in products:
        wrapper = prod.select_one("div.wc-loop-product-wrapper")

        # 1) Product page & title
        a_title = wrapper.select_one("h2.wc-loop-product-title a")
        link     = a_title["href"]
        title    = a_title.get_text(strip=True)

        # 2) Price
        price_tag = wrapper.select_one("span.price")
        price     = price_tag.get_text(strip=True) if price_tag else None

        # 3) Image URL (lazy-loaded)
        img_tag = wrapper.select_one("img.swiper-lazy")
        raw_src = img_tag.get("data-src") or img_tag.get("src")
        img_url = urljoin(START_URL, raw_src)

        # 4) Categories
        category = [a.get_text(strip=True)
                for a in wrapper.select("div.loop-product-categories a")]

        # 5) SKU & product_id from the add‑to‑cart button
        cart_btn = wrapper.select_one("a.add_to_cart_button")
        sku        = cart_btn["data-product_sku"]
        product_id = cart_btn["data-product_id"]



        # 6) Download the image, name by SKU
        ext      = os.path.splitext(raw_src)[1].split("?")[0] or ".jpg"

        # remove illegal filename chars
        safe_title = re.sub(r'[\\/*?:"<>|]', "", title)

        # replace spaces (and dashes) with underscores
        safe_title = safe_title.replace(" ", "_").replace("–", "-")

        # generate a short 8‑hex‑digit ID
        unit_id = uuid.uuid4().hex[:8]

        filename = f"{unit_id}_{safe_title}{ext}"
        filepath     = os.path.join(OUTPUT_DIR, filename)


        # download in streaming mode
        resp = requests.get(img_url, stream=True)
        resp.raise_for_status()
        with open(filepath, "wb") as f:
            for chunk in resp.iter_content(chunk_size=8192):
                if not chunk:
                    continue
                f.write(chunk)


        # 7. output or store your data
        data_collected[unit_id] = {
            "link":     link,
            "title":    title,
            "category":  category,
            "price":    price,
            "sku":      sku,
            "image":    filepath,      # path where you saved it
            "image_url": img_url,      # original URL
        }
        pass
    return True

In [23]:
page = 1
while True:
    print(f"Scraping page {page}…")
    success = scrape_page(page)
    if not success:
        print("No more pages. Done.")
        break
    page += 1
    time.sleep(1)  # be polite

Scraping page 1…
Scraping page 2…
Scraping page 3…
Scraping page 4…
Scraping page 5…
Scraping page 6…
Scraping page 7…
Scraping page 8…
Scraping page 9…
Scraping page 10…
Scraping page 11…
Scraping page 12…
Scraping page 13…
Scraping page 14…
Scraping page 15…
Scraping page 16…
Scraping page 17…
Scraping page 18…
Scraping page 19…
Scraping page 20…
Scraping page 21…
Scraping page 22…
Scraping page 23…
Scraping page 24…
Scraping page 25…
Scraping page 26…
Scraping page 27…
Scraping page 28…
Scraping page 29…
Scraping page 30…
Scraping page 31…
Scraping page 32…
Scraping page 33…
Scraping page 34…
Scraping page 35…
Scraping page 36…
Scraping page 37…
Scraping page 38…
Scraping page 39…
Scraping page 40…
Scraping page 41…
Scraping page 42…
Scraping page 43…
Scraping page 44…
Scraping page 45…
Scraping page 46…
Scraping page 47…
Scraping page 48…
Scraping page 49…
Scraping page 50…
Scraping page 51…
Scraping page 52…
Scraping page 53…
Scraping page 54…
Scraping page 55…
Scraping page 56…
S

In [24]:
df = pd.DataFrame(data_collected)

In [25]:
df = df.T

In [26]:
df

Unnamed: 0,link,title,category,price,sku,image,image_url
02f31ccb,https://nextdaykoi.com/product/3-5-imported-sh...,3.5” Imported Shusui,"[Asagi and Shusui, Imported Koi, Koi, Shusui, ...",$30.00,HS0505G01515C14,nextdaykoi_data/02f31ccb_3.5”_Imported_Shusui.jpg,https://d2e07cbkdk0gwy.cloudfront.net/wp-conte...
bec4495e,https://nextdaykoi.com/product/3-5-imported-ko...,3.5” Imported Kohaku,"[Gosanke, Imported Koi, Kohaku, Koi, Single Fi...",$30.00,HS0505G01515C13,nextdaykoi_data/bec4495e_3.5”_Imported_Kohaku.jpg,https://d2e07cbkdk0gwy.cloudfront.net/wp-conte...
62543bb1,https://nextdaykoi.com/product/3-5-imported-ar...,3.5” Imported Armor Scaled Hariwake,"[All Hikari Koi, Hariwake, Hikarimoyo, Importe...",$30.00,HS0505G01515C11,nextdaykoi_data/62543bb1_3.5”_Imported_Armor_S...,https://d2e07cbkdk0gwy.cloudfront.net/wp-conte...
ea7ff9e9,https://nextdaykoi.com/product/3-5-imported-be...,3.5” Imported Doitsu Aka Bekko,"[Aka Bekko, Bekko, Imported Koi, Koi, Single F...",$30.00,HS0505G01515C10,nextdaykoi_data/ea7ff9e9_3.5”_Imported_Doitsu_...,https://d2e07cbkdk0gwy.cloudfront.net/wp-conte...
f6ec77bc,https://nextdaykoi.com/product/3-5-imported-gi...,3.5” Imported Gin Rin Hariwake,"[All Hikari Koi, Hariwake, Hikarimoyo, Importe...",$30.00,HS0505G01515C08,nextdaykoi_data/f6ec77bc_3.5”_Imported_Gin_Rin...,https://d2e07cbkdk0gwy.cloudfront.net/wp-conte...
...,...,...,...,...,...,...,...
31649d26,https://nextdaykoi.com/product/5-6-imported-go...,5-6” Imported Goldfish Mix,"[Goldfish Pond Packs, Pond Pack Koi, Butterfly...",$65.60 – $364.45,,nextdaykoi_data/31649d26_5-6”_Imported_Goldfis...,https://d2e07cbkdk0gwy.cloudfront.net/wp-conte...
776eec39,https://nextdaykoi.com/product/aa-grade-premiu...,AA Grade (Premium Grade) Imported Koi,"[Koi Pond Packs, Pond Pack Koi, Butterfly Koi ...",$125.10 – $695.15,,nextdaykoi_data/776eec39_AA_Grade_(Premium_Gra...,https://d2e07cbkdk0gwy.cloudfront.net/wp-conte...
044d1924,https://nextdaykoi.com/product/4-5-imported-ap...,5-6” Imported Apricot Comet Goldfish,"[Goldfish Pond Packs, Pond Pack Koi, Butterfly...",$62.40 – $346.70,,nextdaykoi_data/044d1924_5-6”_Imported_Apricot...,https://d2e07cbkdk0gwy.cloudfront.net/wp-conte...
135fea06,https://nextdaykoi.com/product/2-2-5-imported-...,2-3″ Imported Mixed Oranda Goldfish,"[Goldfish Pond Packs, Pond Pack Koi, Butterfly...",$59.65 – $447.65,,nextdaykoi_data/135fea06_2-3″_Imported_Mixed_O...,https://d2e07cbkdk0gwy.cloudfront.net/wp-conte...


In [27]:
df.duplicated(subset='image_url').sum()

np.int64(1)

In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1948 entries, 02f31ccb to 4796267b
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   link       1948 non-null   object
 1   title      1948 non-null   object
 2   category   1948 non-null   object
 3   price      1948 non-null   object
 4   sku        1948 non-null   object
 5   image      1948 non-null   object
 6   image_url  1948 non-null   object
dtypes: object(7)
memory usage: 121.8+ KB


In [29]:
# df.to_csv('nextdaykoi_data.csv', index=True)