In [8]:
from bs4 import BeautifulSoup
import requests

In [9]:
def get_soup(url):
    req = requests.get(url)
    soup = BeautifulSoup(req.content, 'html.parser')
    return soup

In [10]:
def get_product_urls(soup):
    return [link.get('href') for link in soup.find_all(
        'a', class_='product-item-link', href=True)]

In [11]:
def get_product_info(soup):
    product_info = {}
    product_info['name'] = soup.find('span', itemprop="name").text
    product_attr = soup.find('tbody').find_all('tr')
    for attr in product_attr:
        key = attr.find('th').text
        value = attr.find('td').text
        product_info[key] = value
    return product_info

In [12]:
base_url = "https://www.yonex.com/badminton/racquets"

urls = [base_url]
for p in range (2, 8):
    urls.append(base_url + '?p=' + str(p))

urls

['https://www.yonex.com/badminton/racquets',
 'https://www.yonex.com/badminton/racquets?p=2',
 'https://www.yonex.com/badminton/racquets?p=3',
 'https://www.yonex.com/badminton/racquets?p=4',
 'https://www.yonex.com/badminton/racquets?p=5',
 'https://www.yonex.com/badminton/racquets?p=6',
 'https://www.yonex.com/badminton/racquets?p=7']

In [13]:
product_urls = []
for url in urls:
    soup = get_soup(url)
    product_urls.extend(get_product_urls(soup))

len(product_urls), product_urls

(78,
 ['https://www.yonex.com/badminton/racquets/ax02a',
  'https://www.yonex.com/badminton/racquets/ax02c',
  'https://www.yonex.com/badminton/racquets/ax02f',
  'https://www.yonex.com/badminton/racquets/arcsaber-2-ability',
  'https://www.yonex.com/badminton/racquets/arcsaber-2-feel',
  'https://www.yonex.com/badminton/racquets/arcsaber-2-clear',
  'https://www.yonex.com/badminton/racquets/2nf700pl',
  'https://www.yonex.com/badminton/racquets/2nf-700g',
  'https://www.yonex.com/badminton/racquets/nanoflare-700-pro',
  'https://www.yonex.com/badminton/racquets/2nf-700t',
  'https://www.yonex.com/badminton/racquets/3ax88s-p',
  'https://www.yonex.com/badminton/racquets/3ax88s-t',
  'https://www.yonex.com/badminton/racquets/3ax88s-g',
  'https://www.yonex.com/badminton/racquets/3ax88d-p',
  'https://www.yonex.com/badminton/racquets/3ax88d-t',
  'https://www.yonex.com/badminton/racquets/3ax88d-g',
  'https://www.yonex.com/badminton/racquets/3ax88-pl',
  'https://www.yonex.com/badminton/

In [14]:
product_infos = []

for url in product_urls:
    soup = get_soup(url)
    product_info = get_product_info(soup)
    product_infos.append(product_info)
    print(product_info)
    
print(f"Successfully scraped {len(product_infos)} products")

{'name': 'ASTROX 02 ABILITY', 'Flex': 'Hi-Flex', 'Frame': 'HT Graphite', 'Shaft': 'Graphite', 'Length': '10 mm longer', 'Weight / Grip': '4U (Avg. 83g) G4,5', 'Stringing Advice': '4U: 20 - 30 lbs', 'Recommended String': 'Control Players:NANOGY 98Hard Hitters:NANOGY 95', 'Color(s)': 'Red', 'Made In': 'China', 'Item Code': 'AX02A'}
{'name': 'ASTROX 02 CLEAR', 'Flex': 'Hi-Flex', 'Frame': 'HT Graphite', 'Shaft': 'Graphite', 'Length': '10 mm longer', 'Weight / Grip': '4U (Avg. 83g) G4,5', 'Stringing Advice': '4U: 20 - 30 lbs', 'Recommended String': 'Control Players:NANOGY 98Hard Hitters:NANOGY 95', 'Color(s)': 'Royal Blue', 'Made In': 'China', 'Item Code': 'AX02C'}
{'name': 'ASTROX 02 FEEL', 'Flex': 'Hi-Flex', 'Frame': 'HT Graphite', 'Shaft': 'Graphite', 'Length': '10 mm longer', 'Weight / Grip': '4U (Avg. 83g) G4,5', 'Stringing Advice': '4U: 20 - 30 lbs', 'Recommended String': 'Control Players:NANOGY 98Hard Hitters:NANOGY 95', 'Color(s)': 'Turquoise', 'Made In': 'China', 'Item Code': 'AX02

In [15]:
product_infos

[{'name': 'ASTROX 02 ABILITY',
  'Flex': 'Hi-Flex',
  'Frame': 'HT Graphite',
  'Shaft': 'Graphite',
  'Length': '10 mm longer',
  'Weight / Grip': '4U (Avg. 83g) G4,5',
  'Stringing Advice': '4U: 20 - 30 lbs',
  'Recommended String': 'Control Players:NANOGY 98Hard Hitters:NANOGY 95',
  'Color(s)': 'Red',
  'Made In': 'China',
  'Item Code': 'AX02A'},
 {'name': 'ASTROX 02 CLEAR',
  'Flex': 'Hi-Flex',
  'Frame': 'HT Graphite',
  'Shaft': 'Graphite',
  'Length': '10 mm longer',
  'Weight / Grip': '4U (Avg. 83g) G4,5',
  'Stringing Advice': '4U: 20 - 30 lbs',
  'Recommended String': 'Control Players:NANOGY 98Hard Hitters:NANOGY 95',
  'Color(s)': 'Royal Blue',
  'Made In': 'China',
  'Item Code': 'AX02C'},
 {'name': 'ASTROX 02 FEEL',
  'Flex': 'Hi-Flex',
  'Frame': 'HT Graphite',
  'Shaft': 'Graphite',
  'Length': '10 mm longer',
  'Weight / Grip': '4U (Avg. 83g) G4,5',
  'Stringing Advice': '4U: 20 - 30 lbs',
  'Recommended String': 'Control Players:NANOGY 98Hard Hitters:NANOGY 95',
  'C

In [16]:
import pandas as pd

df = pd.DataFrame(product_infos)
df.set_index('name', inplace=True)
df.to_csv('yonex_racquets.csv')
df

Unnamed: 0_level_0,Flex,Frame,Shaft,Length,Weight / Grip,Stringing Advice,Recommended String,Color(s),Made In,Item Code,Joint
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ASTROX 02 ABILITY,Hi-Flex,HT Graphite,Graphite,10 mm longer,"4U (Avg. 83g) G4,5",4U: 20 - 30 lbs,Control Players:NANOGY 98Hard Hitters:NANOGY 95,Red,China,AX02A,
ASTROX 02 CLEAR,Hi-Flex,HT Graphite,Graphite,10 mm longer,"4U (Avg. 83g) G4,5",4U: 20 - 30 lbs,Control Players:NANOGY 98Hard Hitters:NANOGY 95,Royal Blue,China,AX02C,
ASTROX 02 FEEL,Hi-Flex,HT Graphite,Graphite,10 mm longer,"4U (Avg. 83g) G4,5",4U: 20 - 30 lbs,Control Players:NANOGY 98Hard Hitters:NANOGY 95,Turquoise,China,AX02F,
ARCSABER 2 ABILITY,Hi-Flex,HT Graphite,Graphite,10 mm longer,"4U5,6",4U: 20 - 30 lbs,Control Players: NANOGY 98Hard Hitters: NANOGY 95,Black / Pink,China,ARC2A,
ARCSABER 2 FEEL,Hi-Flex,HT Graphite,Graphite,10 mm longer,"4U5,6",4U: 20 - 30 lbs,Control Players: NANOGY 98Hard Hitters: NANOGY 95,Black / Green,China,ARC2F,
...,...,...,...,...,...,...,...,...,...,...,...
MUSCLE POWER 1,,Aluminum,Steel,,U (105g and below) G4,U: 16 - 20 lbs,,"White / Black , White / Red",,MP1,
B 7000 MDM,,Aluminum,Steel,,U (105g and below) G4,U: 16 - 20 lbs,,"Red, Cyan",,B7000MDM,
B 4000,,Aluminum,Steel,,U (105g and below) G4,U: 16 - 20 lbs,,"Mint, Corn Yellow",,B4000,
NANOFLARE JUNIOR,Hi-Flex,"Graphite, Nanocell NEO, HM Graphite",Graphite,,4U (Avg. 83g) G7,4U: 17 - 22 lbs,,Cyan,,NF-JR,Built-in T-Joint
