In [1]:
import numpy as np
import pandas as pd
import bs4
import requests

In [5]:
main_url = 'https://www.orzgk.com/shop/'

In [6]:
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'}

In [7]:
def get_soup(url):
    res = requests.get(url, headers)
    return bs4.BeautifulSoup(res.text, 'html.parser')

In [8]:
def get_columns_name():
    cols = []
    figures_page = get_soup(main_url)
    description_link = figures_page.find('a', class_='woocommerce-LoopProduct-link woocommerce-loop-product__link')
    description_page = get_soup(main_url+description_link['href'])
    description_area = description_page.find('div', class_='product-info summary col-fit col entry-summary product-summary text-left')
    description = description_area.find_all('div', class_='product_meta_r')
    cols = [dscr.find(string=True)[:-2] for dscr in description] + ['Type', 'Price']
    return cols

In [9]:
col_names = get_columns_name()
col_names

['Product Phase',
 'Product State',
 'Brand',
 'From',
 'Character',
 'Type',
 'Height Range',
 'Scale',
 'Feature',
 'Est Released Time',
 'Type',
 'Price']

In [10]:
df = pd.DataFrame(columns = col_names)
df

Unnamed: 0,Product Phase,Product State,Brand,From,Character,Type,Height Range,Scale,Feature,Est Released Time,Type.1,Price


In [11]:
def get_row(description, price):
    new_row = {}
    for dscr in description:
        dscr_list = dscr.find_all(string=True)
        col_name = dscr_list[0][:-2]
        if len(dscr_list) > 1:
            new_row[col_name] = dscr_list[1]
        else:     
            new_row[col_name] = None
        
        if col_name == 'From':
            new_row['Type'] = None
            new_row['From'] = None
            if len(dscr_list)>1:
                new_row['Type'] = dscr_list[1]
            if len(dscr_list)>2:
                new_row['From'] = dscr_list[3]
    new_row['Price'] = price
    return new_row

In [12]:
price_clsasses = ['price product-page-price', 'price product-page-price price-not-in-stock', 'price product-page-price price-on-sale']
for page_ind in range(1, 50):
    url = main_url+f'/page/{page_ind}/'
    figures_page = get_soup(url)
    description_links = figures_page.find_all('a', class_='woocommerce-LoopProduct-link woocommerce-loop-product__link')
    df_new_page = pd.DataFrame(columns = col_names)
    for fig_ind, description_link in enumerate(description_links):
        print(f"{page_ind}.{fig_ind}) {description_link['href']}")
        description_page = get_soup(description_link['href'])
        description_area = description_page.find('div', class_='product-info summary col-fit col entry-summary product-summary text-left')
        description = description_area.find_all('div', class_='product_meta_r')
        price_area = None
        for price_cl in price_clsasses:
            price_area = description_area.find('p', class_=price_cl)
            if price_area is not None:
                break
        price = price_area.find_all(string=True)[-1]
        new_row = get_row(description, price)
        df_new_page.loc[len(df_new_page)] = new_row
    df = pd.concat([df, df_new_page], ignore_index=True)
    if page_ind % 10 == 0:
        df.to_csv(f'csv_data/page_{page_ind}.csv', index=False)
    print('\n')

1.0) https://www.orzgk.com/product/yohol-studio-azur-lane-janus/
1.1) https://www.orzgk.com/product/power-studio-dragon-ball-saiyan-chapter-training-son-gohan/
1.2) https://www.orzgk.com/product/gentleman-18-studio-original-demon-courier/
1.3) https://www.orzgk.com/product/prime-1-studio-special-package-prime1studio-hunters-victim-psppr-01/
1.4) https://www.orzgk.com/product/gj-studio-ygnn-studio-original-taki/
1.5) https://www.orzgk.com/product/cm-studio-spy-x-family-anya-forger-series-2-anya-forger-cos-doma-umaru/
1.6) https://www.orzgk.com/product/fxw-studio-digimon-magnamon/
1.7) https://www.orzgk.com/product/iconic-figure-studio-kamen-rider-kamen-rider-black-sun-vs-kamen-rider-shadow-moon/
1.8) https://www.orzgk.com/product/lingyue-studio-honkai-star-rail-masked-fools-sparkle/
1.9) https://www.orzgk.com/product/qing-yuan-studio-neon-genesis-evangelion-artist-collaboration-model-ayanami-rei/
1.10) https://www.orzgk.com/product/dx-studio-one-piece-monkey-d-luffy-on-a-bull/
1.11) htt

In [14]:
df

Unnamed: 0,Product Phase,Product State,Brand,From,Character,Type,Height Range,Scale,Feature,Est Released Time,Type.1,Price
0,early bird,orderable,YOHOL Studio,Azur Lane,Janus,GK Statue,16-25cm,1/6,18+ Female,12/2024,GK Statue,25560.75
1,pre-order,orderable,power studio,Dragon Ball,Son Gohan,GK Statue,under 10cm,wcf-like & sd-like,Other,06/2025,GK Statue,5371.46
2,pre-order,orderable,Gentleman 18 Studio,Original,Demon courier,GK Statue,16-25cm,1/6,18+ Male,,GK Statue,12872.99
3,released,contact for current prices,Prime 1 Studio,PRIME 1 STUDIO,Hunter's Victim,Licensed Statue,over 90cm,1/4,Other,,Licensed Statue,73996.52
4,early bird,orderable,GJ Studio,Original,Taki,GK Statue,unknown,1/6,18+ Male,,GK Statue,27690.81
...,...,...,...,...,...,...,...,...,...,...,...,...
1171,released,orderable,jiayou Studio,Genshin Impact,Furina,PVC Figure,10-15cm,other scale,Other,08/2024,PVC Figure,740.89
1172,pre-order,orderable,KOD Studio,Blue Archive,Hayase Yuuka,GK Statue,26-35cm,1/6,18+ Female,03/2025,GK Statue,15743.94
1173,released,contact for current prices,Prime 1 Studio,DC Comics,Superboy,Licensed Statue,51-69cm,1/3,Other,,Licensed Statue,92518.80
1174,pre-order,orderable,A.M Studio,Crayon Shin-chan,Shinnosuke Nohara,GK Statue,10-15cm,other scale,Other,,GK Statue,6760.63
