In [14]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os

def setup_driver(driver_path):
    """
    Set up the Chrome WebDriver.
    """
    service = Service(executable_path=driver_path)
    return webdriver.Chrome(service=service)

def wait_and_click(driver, by, value, timeout=10):
    """
    Wait for an element to be clickable and click it.
    """
    try:
        element = WebDriverWait(driver, timeout).until(
            EC.element_to_be_clickable((by, value))
        )
        element.click()
    except Exception as e:
        print(f"Element not found or could not be clicked: {str(e)}")

def get_select_elements(driver, by, value, timeout=10):
    """
    Wait for and return all elements matching the selector.
    """
    try:
        elements = WebDriverWait(driver, timeout).until(
            EC.presence_of_all_elements_located((by, value))
        )
        return elements
    except Exception as e:
        print(f"Elements not found: {str(e)}")
        return []

def get_designs(designs_list, available_designs):
    """
    Filter and return the selected designs from the available designs list.
    """
    selected_designs = []
    for design in available_designs:
        if design.text in designs_list:
            selected_designs.append(design)
    return selected_designs

def collect_table_info(driver, div_class):
    """
    Collect both table headers and data based on the specified div class.
    """
    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_info = {}
        for i, table in enumerate(tables):
            header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
            headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
            headers_text = {
                index: {i: th.text for i, th in enumerate(th_list)}
                for index, th_list in headers_dict.items()
            }
            
            table_data_rows = table.find_elements(By.TAG_NAME, 'tbody')[0].find_elements(By.TAG_NAME, 'tr')
            data_rows = [{i: td.text for i, td in enumerate(row.find_elements(By.TAG_NAME, 'td'))} for row in table_data_rows]
            
            table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": data_rows
                }
        
        return table_info
    
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")
        return []

# Configuration
website = "https://yokohama-atg.com/usa/tire-selector-yokohama-off-highway-tires/"
driver_path = "C:\\Users\\cheta\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe"  # Change according to your PC's file path
my_designs = ['350']
folder = 'Alliance'

if not os.path.exists(folder):
    os.mkdir(folder)

try:
    # Initialize WebDriver
    driver = setup_driver(driver_path)
    driver.get(website)

    # Accept cookies
    wait_and_click(driver, By.XPATH, '//span[@data-cookie-set="accept"]')

    # Select brand
    select_buttons = get_select_elements(driver, By.XPATH, '//span[@class="select2-arrow"]')
    brands_button = select_buttons[2]
    brands_button.click()
    brands = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    brands[0].click()

    # Wait for the loader to disappear
    WebDriverWait(driver, 10).until(EC.invisibility_of_element((By.XPATH, '//div[@class="loader-wrapper"]')))

    # Select design
    design_button = select_buttons[3]
    design_button.click()
    available_designs = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    selected_designs = get_designs(my_designs, available_designs)
    if selected_designs:
        selected_designs[0].click()

    # Click search button
    wait_and_click(driver, By.XPATH, '//a[@title="Search"]')

    # Collect data for both mm and inch tables
    international_table_info = collect_table_info(driver, "data_in_mm")
    usa_table_info = collect_table_info(driver, "data_in_inch")

finally:
    time.sleep(5)
    driver.quit()
    print("All data has been collected and the browser is closed.")


All data has been collected and the browser is closed.


In [11]:
international_table_info

{'Table_1': {'headers': {0: {0: 'Size',
    1: 'Rim',
    2: 'Unloaded Dimension',
    3: 'Loaded Static radius\nmm',
    4: 'Rolling Circumference\nmm',
    5: 'Load Index\nPR\nSymbols',
    6: 'Inflation Pressure\nbar',
    7: 'Recommended Load'},
   1: {0: 'Speed'},
   2: {0: 'Not high and sustained torque;Road transport'},
   4: {0: 'SW\nmm',
    1: 'OD\nmm',
    2: '10\nkmph',
    3: '20\nkmph',
    4: '25\nkmph',
    5: '30\nkmph',
    6: '40\nkmph',
    7: '50\nkmph',
    8: '65\nkmph'}},
  'data': [{0: '300/95R52 (12.4R52)',
    1: 'W10X52 W9X52',
    2: '310',
    3: '1890',
    4: '887',
    5: '5781',
    6: '156D\n159A8',
    7: '0.8',
    8: '2100',
    9: '1720',
    10: '1660',
    11: '1610',
    12: '1530',
    13: '1470',
    14: '1400'},
   {0: '1.2',
    1: '2700',
    2: '2210',
    3: '2130',
    4: '2070',
    5: '1970',
    6: '1890',
    7: '1800'},
   {0: '1.6',
    1: '3270',
    2: '2680',
    3: '2580',
    4: '2510',
    5: '2390',
    6: '2290',
    7: '2

In [67]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os

def setup_driver(driver_path):
    """
    Set up the Chrome WebDriver.
    """
    service = Service(executable_path=driver_path)
    return webdriver.Chrome(service=service)

def wait_and_click(driver, by, value, timeout=10):
    """
    Wait for an element to be clickable and click it.
    """
    try:
        element = WebDriverWait(driver, timeout).until(
            EC.element_to_be_clickable((by, value))
        )
        element.click()
    except Exception as e:
        print(f"Element not found or could not be clicked: {str(e)}")

def get_select_elements(driver, by, value, timeout=10):
    """
    Wait for and return all elements matching the selector.
    """
    try:
        elements = WebDriverWait(driver, timeout).until(
            EC.presence_of_all_elements_located((by, value))
        )
        return elements
    except Exception as e:
        print(f"Elements not found: {str(e)}")
        return []

def get_designs(designs_list, available_designs):
    """
    Filter and return the selected designs from the available designs list.
    """
    selected_designs = []
    for design in available_designs:
        if design.text in designs_list:
            selected_designs.append(design)
    return selected_designs


def collect_table_info(driver, div_class):
    """
    Collect both table headers and data based on the specified div class.
    """
    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_info = {}
        for i, table in enumerate(tables):
            header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
            headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
            headers_text = {
                index: {i: th.text for i, th in enumerate(th_list)}
                for index, th_list in headers_dict.items()
            }
            
            table_data_rows = table.find_elements(By.TAG_NAME, 'tbody')[0].find_elements(By.TAG_NAME, 'tr')
            data_rows = [{i: td.text for i, td in enumerate(row.find_elements(By.TAG_NAME, 'td'))} for row in table_data_rows]
            
            table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": data_rows
                }
        
        return table_info
    
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")
        return []

def collect_filtered_data(driver, div_class):
    """
    Collect filtered data from tables based on the specified div class.
    """
    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_data = []
        
        xpath_expression = f'//tbody/tr[./td[@style="color:#a30e13 !important"]]'
        table_unit = {'data_in_mm': 'tables_mm', 'data_in_inch': 'tables_inch'}

        if table_unit[div_class] == 'tables_mm':

            table_info = {}

            print('\nfitered rows in mm:\n')
            for i, table in enumerate(tables):

                header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
                headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
                headers_text = {
                    index: {i: th.text for i, th in enumerate(th_list)}
                    for index, th_list in headers_dict.items()
                }

                table_data_in_mm = table.find_element(By.TAG_NAME, 'tbody').find_elements(By.TAG_NAME, 'tr')

                data_rows = [{i: td.text for i, td in enumerate(table_data_in_mm[0].find_elements(By.TAG_NAME, 'td'))}]
                data_row = {k: data_rows[0][k] for k in range(7)}

                filtered_data = {}
                for index, tr in enumerate(table_data_in_mm):
                    filter_list = tr.find_elements(By.XPATH, xpath_expression)
                    for idx, ftr in enumerate(filter_list):
                        filtered_data[idx] = ftr.text

                data = filtered_data[i]
                table_data.append({0: data})
                
                print(f'Table_{i+1} Data Collected')

                processed_data = [
                    {
                        i: {j: txt for j, txt in enumerate(td.split(' '))}
                        for i, td in table.items()
                    }
                    for table in table_data
                ]

                table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": [data_row, processed_data[i]]
                }

            return table_info

        else:
            table_info = {}

            print('\nfitered rows in inch:\n')

            for i, table in enumerate(tables):

                header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
                headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
                headers_text = {
                    index: {i: th.text for i, th in enumerate(th_list)}
                    for index, th_list in headers_dict.items()
                }

                table_data_in_inch = table.find_element(By.TAG_NAME, 'tbody').find_elements(By.TAG_NAME, 'tr')
                data_rows = [{i: td.text for i, td in enumerate(table_data_in_inch[0].find_elements(By.TAG_NAME, 'td'))}]
                data_row = {k: data_rows[0][k] for k in range(7)}

                filtered_data = {}
                for index, tr in enumerate(table_data_in_inch, start=2):
                    filter_list = tr.find_elements(By.XPATH, xpath_expression)
                    for idx, ftr in enumerate(filter_list):
                        filtered_data[idx] = ftr.text

                data = filtered_data[i+2]
                table_data.append({0: data})

                print(f'Table_{i+1} Data Collected')

                processed_data = [
                    {
                        i: {j: txt for j, txt in enumerate(td.split(' '))}
                        for i, td in table.items()
                    }
                    for table in table_data
                ]

                table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": [data_row, processed_data[i]]
                }
        
        return table_info
    
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")
        return []

website = "https://yokohama-atg.com/usa/tire-selector-yokohama-off-highway-tires/"
driver_path = "C:\\Users\\cheta\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe"  # Change according to your PC's file path
my_designs = ['350']
folder = 'Alliance'

if not os.path.exists(folder):
    os.mkdir(folder)

try:
    driver = setup_driver(driver_path)
    driver.get(website)

    wait_and_click(driver, By.XPATH, '//span[@data-cookie-set="accept"]')

    select_buttons = get_select_elements(driver, By.XPATH, '//span[@class="select2-arrow"]')
    brands_button = select_buttons[2]
    brands_button.click()
    brands = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    brands[0].click()

    WebDriverWait(driver, 10).until(EC.invisibility_of_element((By.XPATH, '//div[@class="loader-wrapper"]')))

    design_button = select_buttons[3]
    design_button.click()
    available_designs = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    selected_designs = get_designs(my_designs, available_designs)
    if selected_designs:
        selected_designs[0].click()

    wait_and_click(driver, By.XPATH, '//a[@title="Search"]')

    international_data = collect_table_info(driver, "data_in_mm")
    usa_data = collect_table_info(driver, "data_in_inch")

    international_filtered_data = collect_filtered_data(driver, "data_in_mm")
    usa_filtered_data = collect_filtered_data(driver, "data_in_inch")

finally:
    time.sleep(5)
    driver.quit()
    print("All data has been collected and the browser is closed.")



fitered rows in mm:

Table_1 Data Collected
Table_2 Data Collected

fitered rows in inch:

Table_1 Data Collected
Table_2 Data Collected
All data has been collected and the browser is closed.


In [68]:
print(international_data)
print(international_filtered_data)

{'Table_1': {'headers': {0: {0: 'Size', 1: 'Rim', 2: 'Unloaded Dimension', 3: 'Loaded Static radius\nmm', 4: 'Rolling Circumference\nmm', 5: 'Load Index\nPR\nSymbols', 6: 'Inflation Pressure\nbar', 7: 'Recommended Load'}, 1: {0: 'Speed'}, 2: {0: 'Not high and sustained torque;Road transport'}, 4: {0: 'SW\nmm', 1: 'OD\nmm', 2: '10\nkmph', 3: '20\nkmph', 4: '25\nkmph', 5: '30\nkmph', 6: '40\nkmph', 7: '50\nkmph', 8: '65\nkmph'}}, 'data': [{0: '300/95R52 (12.4R52)', 1: 'W10X52 W9X52', 2: '310', 3: '1890', 4: '887', 5: '5781', 6: '156D\n159A8', 7: '0.8', 8: '2100', 9: '1720', 10: '1660', 11: '1610', 12: '1530', 13: '1470', 14: '1400'}, {0: '1.2', 1: '2700', 2: '2210', 3: '2130', 4: '2070', 5: '1970', 6: '1890', 7: '1800'}, {0: '1.6', 1: '3270', 2: '2680', 3: '2580', 4: '2510', 5: '2390', 6: '2290', 7: '2180'}, {0: '2', 1: '3650', 2: '2990', 3: '2880', 4: '2790', 5: '2660', 6: '2550', 7: '2430'}, {0: '2.4', 1: '3860', 2: '3170', 3: '3050', 4: '2960', 5: '2820', 6: '2700', 7: '2575'}, {0: '2

In [69]:
print(usa_data)
print(usa_filtered_data)

{'Table_1': {'headers': {0: {0: 'Size', 1: 'Rim', 2: 'Unloaded Dimension', 3: 'Loaded Static radius\ninch', 4: 'Rolling Circumference\ninch', 5: 'Load Index\nPR\nSymbols', 6: 'Inflation Pressure\npsi', 7: 'Recommended Load'}, 1: {0: 'Speed'}, 2: {0: 'Not high and sustained torque;Road transport'}, 4: {0: 'SW\ninch', 1: 'OD\ninch', 2: '6\nmph', 3: '12\nmph', 4: '16\nmph', 5: '19\nmph', 6: '25\nmph', 7: '31\nmph', 8: '40\nmph'}}, 'data': [{0: '300/95R52 (12.4R52)', 1: 'W10X52 W9X52', 2: '12.2', 3: '74.4', 4: '34.9', 5: '227.6', 6: '156D\n159A8', 7: '12', 8: '4630', 9: '3790', 10: '3660', 11: '3550', 12: '3370', 13: '3240', 14: '3090'}, {0: '17', 1: '5950', 2: '4870', 3: '4700', 4: '4560', 5: '4340', 6: '4170', 7: '3970'}, {0: '23', 1: '7210', 2: '5910', 3: '5690', 4: '5530', 5: '5270', 6: '5050', 7: '4810'}, {0: '29', 1: '8050', 2: '6590', 3: '6350', 4: '6150', 5: '5860', 6: '5620', 7: '5360'}, {0: '35', 1: '8510', 2: '6990', 3: '6720', 4: '6530', 5: '6220', 6: '5950', 7: '5680'}, {0: '4

In [167]:
import pandas as pd

def extract_headers(table):
    """
    Extracts and processes the headers from the table.
    """
    row1 = [text.replace('\n', ' ') for text in list(table[0].values())]
    row2 = list(table[1].values())
    row3 = list(table[2].values())
    row4 = [text.replace('\n', ' ') for text in list(table[4].values())]
    
    return row1, row2, row3, row4

def create_multi_index(row1, row2, row3, row4):
    """
    Creates a multi-level index for DataFrame columns based on the headers.
    """
    tuples = []
    for col in row1:
        if col == 'Unloaded Dimension':
            tuples.extend([(col, '', '', row4[0]), (col, '', '', row4[1])])
        elif col == 'Recommended Load':
            tuples.extend([(col, row2[0], row3[0], subcol) for subcol in row4[2:]])
        else:
            tuples.append((col, '', '', ''))
    
    return pd.MultiIndex.from_tuples(tuples)

def create_dataframe(headers, sample_data):
    """
    Creates a DataFrame from the processed headers and sample data.
    """
    row1, row2, row3, row4 = extract_headers(headers)
    columns = create_multi_index(row1, row2, row3, row4)
    return pd.DataFrame(sample_data, columns=columns)

def process_table(table_info, table_key):
    columns = table_info[table_key]['headers']
    table_data = table_info[table_key]['data']

    dr1 = [text.replace('\n', ' ') for text in list(table_data[0].values())]

    if len(dr1) <= 7:
        max_key = max(table_data[0])
        combined_dict = table_data[0].copy()

        for k,v in table_data[1][0].items():
            combined_dict[max_key + 1 + k] = v

        combined_data = list(combined_dict.values())

        data_rows = [combined_data]

        return create_dataframe(columns, data_rows)
    else:
        data_rows = [dr1]

    for i, dr in enumerate(table_data[1:], start=2):
        drs = globals()[f'dr{i}'] = [''] * 7 + list(dr.values())
        data_rows.append(drs)

    return create_dataframe(columns, data_rows)

def save_to_csv(df, filename):
    """
    Saves the DataFrame to a CSV file.
    """
    df.to_csv(filename, index=False)
    print(f"CSV file '{filename}' saved successfully.")

inter_table1 = process_table(international_data, 'Table_1')
save_to_csv(inter_table1, 'inter_table1.csv')

inter_table2 = process_table(international_data, 'Table_2')
save_to_csv(inter_table2, 'inter_table2.csv')

usa_table1 = process_table(usa_data, 'Table_1')
save_to_csv(usa_table1, 'usa_table1.csv')

usa_table2 = process_table(usa_data, 'Table_2')
save_to_csv(usa_table2, 'usa_table2.csv')


CSV file 'inter_table1.csv' saved successfully.
CSV file 'inter_table2.csv' saved successfully.
CSV file 'usa_table1.csv' saved successfully.
CSV file 'usa_table2.csv' saved successfully.


In [168]:
# Filtered

inter_table1_filtered = process_table(international_filtered_data, 'Table_1')
save_to_csv(inter_table1, 'inter_table1_filtered.csv')

inter_table2_filtered  = process_table(international_filtered_data, 'Table_2')
save_to_csv(inter_table2, 'inter_table2_filtered.csv')

usa_table1_filtered  = process_table(usa_filtered_data, 'Table_1')
save_to_csv(usa_table1_filtered, 'usa_table1_filtered.csv')

usa_table2_filtered  = process_table(usa_filtered_data, 'Table_2')
save_to_csv(usa_table2_filtered, 'usa_table2_filtered.csv')

CSV file 'inter_table1_filtered.csv' saved successfully.
CSV file 'inter_table2_filtered.csv' saved successfully.
CSV file 'usa_table1_filtered.csv' saved successfully.
CSV file 'usa_table2_filtered.csv' saved successfully.


In [169]:
inter_table2_filtered

Unnamed: 0_level_0,Size,Rim,Unloaded Dimension,Unloaded Dimension,Loaded Static radius mm,Rolling Circumference mm,Load Index PR Symbols,Inflation Pressure bar,Recommended Load,Recommended Load
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Speed,Speed
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,CYCLIC OPERATION,CYCLIC OPERATION
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,SW mm,OD mm,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,10 kmph,15 kmph
0,300/95R52 (12.4R52),W10X52 W9X52,310,1890,887,5781,156D\n159A8,6,6800,6200


In [112]:
data1 = international_filtered_data['Table_1']['data']
data1

[{0: '300/95R52 (12.4R52)',
  1: 'W10X52 W9X52',
  2: '310',
  3: '1890',
  4: '887',
  5: '5781',
  6: '156D\n159A8'},
 {0: {0: '4.8',
   1: '6000',
   2: '4920',
   3: '4740',
   4: '4600',
   5: '4380',
   6: '4200',
   7: '4000'}}]

In [114]:
data1[0]

{0: '300/95R52 (12.4R52)',
 1: 'W10X52 W9X52',
 2: '310',
 3: '1890',
 4: '887',
 5: '5781',
 6: '156D\n159A8'}

In [116]:
data1[1][0]

{0: '4.8',
 1: '6000',
 2: '4920',
 3: '4740',
 4: '4600',
 5: '4380',
 6: '4200',
 7: '4000'}

In [117]:
max_key = max(data1[0])
max_key

6

In [120]:
combined_dict = data1[0].copy()

In [121]:
combined_dict

{0: '300/95R52 (12.4R52)',
 1: 'W10X52 W9X52',
 2: '310',
 3: '1890',
 4: '887',
 5: '5781',
 6: '156D\n159A8'}

In [124]:
for k,v in data1[1][0].items():
    combined_dict[max_key + 1 + k] = v

In [131]:
max_key = max(data1[0])
combined_dict = data1[0].copy()

for k,v in data1[1][0].items():
    combined_dict[max_key + 1 + k] = v

combined_data = list(combined_dict.values())

In [133]:
pd.DataFrame([combined_data], columns=mulcol)

Unnamed: 0_level_0,Size,Rim,Unloaded Dimension,Unloaded Dimension,Loaded Static radius mm,Rolling Circumference mm,Load Index PR Symbols,Inflation Pressure bar,Recommended Load,Recommended Load,Recommended Load,Recommended Load,Recommended Load,Recommended Load,Recommended Load
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Speed,Speed,Speed,Speed,Speed,Speed,Speed
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Not high and sustained torque;Road transport,Not high and sustained torque;Road transport,Not high and sustained torque;Road transport,Not high and sustained torque;Road transport,Not high and sustained torque;Road transport,Not high and sustained torque;Road transport,Not high and sustained torque;Road transport
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,SW mm,OD mm,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,10 kmph,20 kmph,25 kmph,30 kmph,40 kmph,50 kmph,65 kmph
0,300/95R52 (12.4R52),W10X52 W9X52,310,1890,887,5781,156D\n159A8,4.8,6000,4920,4740,4600,4380,4200,4000


In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import os

def setup_driver(driver_path):
    """
    Set up the Chrome WebDriver.
    """
    service = Service(executable_path=driver_path)
    return webdriver.Chrome(service=service)

def wait_and_click(driver, by, value, timeout=10):
    """
    Wait for an element to be clickable and click it.
    """
    try:
        element = WebDriverWait(driver, timeout).until(
            EC.element_to_be_clickable((by, value))
        )
        element.click()
    except Exception as e:
        print(f"Element not found or could not be clicked: {str(e)}")

def get_select_elements(driver, by, value, timeout=10):
    """
    Wait for and return all elements matching the selector.
    """
    try:
        elements = WebDriverWait(driver, timeout).until(
            EC.presence_of_all_elements_located((by, value))
        )
        return elements
    except Exception as e:
        print(f"Elements not found: {str(e)}")
        return []

def get_designs(designs_list, available_designs):
    """
    Filter and return the selected designs from the available designs list.
    """
    selected_designs = []
    for design in available_designs:
        if design.text in designs_list:
            selected_designs.append(design)
    return selected_designs

def collect_table_info(driver, div_class):
    """
    Collect both table headers and data based on the specified div class.
    """
    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_info = {}
        for i, table in enumerate(tables):
            header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
            headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
            headers_text = {
                index: {i: th.text for i, th in enumerate(th_list)}
                for index, th_list in headers_dict.items()
            }
            
            table_data_rows = table.find_elements(By.TAG_NAME, 'tbody')[0].find_elements(By.TAG_NAME, 'tr')
            data_rows = [{i: td.text for i, td in enumerate(row.find_elements(By.TAG_NAME, 'td'))} for row in table_data_rows]
            
            table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": data_rows
                }
            print('collect table info!!!')
        
        return table_info
    
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")
        return {}
    

def collect_filtered_data(driver, div_class):
    """
    Collect filtered data from tables based on the specified div class.
    """
    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_data = []
        
        xpath_expression = f'//tbody/tr[./td[@style="color:#a30e13 !important"]]'
        table_unit = {'data_in_mm': 'tables_mm', 'data_in_inch': 'tables_inch'}

        if table_unit[div_class] == 'tables_mm':

            table_info = {}

            for i, table in enumerate(tables):

                header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
                headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
                headers_text = {
                    index: {i: th.text for i, th in enumerate(th_list)}
                    for index, th_list in headers_dict.items()
                }

                table_data_in_mm = table.find_element(By.TAG_NAME, 'tbody').find_elements(By.TAG_NAME, 'tr')

                data_rows = [{i: td.text for i, td in enumerate(table_data_in_mm[0].find_elements(By.TAG_NAME, 'td'))}]
                data_row = {k: data_rows[0][k] for k in range(7)}

                filtered_data = {}
                for index, tr in enumerate(table_data_in_mm):
                    filter_list = tr.find_elements(By.XPATH, xpath_expression)
                    for idx, ftr in enumerate(filter_list):
                        filtered_data[idx] = ftr.text

                data = filtered_data[i]
                table_data.append({0: data})
                
                print(f'Table_{i+1} Data Collected')

                processed_data = [
                    {
                        i: {j: txt for j, txt in enumerate(td.split(' '))}
                        for i, td in table.items()
                    }
                    for table in table_data
                ]

                table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": [data_row, processed_data[i]]
                }

            return table_info

        else:
            table_info = {}

            for i, table in enumerate(tables):

                header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
                headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
                headers_text = {
                    index: {i: th.text for i, th in enumerate(th_list)}
                    for index, th_list in headers_dict.items()
                }

                table_data_in_inch = table.find_element(By.TAG_NAME, 'tbody').find_elements(By.TAG_NAME, 'tr')
                data_rows = [{i: td.text for i, td in enumerate(table_data_in_inch[0].find_elements(By.TAG_NAME, 'td'))}]
                data_row = {k: data_rows[0][k] for k in range(7)}

                filtered_data = {}
                for index, tr in enumerate(table_data_in_inch, start=2):
                    filter_list = tr.find_elements(By.XPATH, xpath_expression)
                    for idx, ftr in enumerate(filter_list):
                        filtered_data[idx] = ftr.text

                data = filtered_data[i+2]
                table_data.append({0: data})

                print(f'Table_{i+1} Data Collected')

                processed_data = [
                    {
                        i: {j: txt for j, txt in enumerate(td.split(' '))}
                        for i, td in table.items()
                    }
                    for table in table_data
                ]

                table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": [data_row, processed_data[i]]
                }
        
        return table_info
    
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")
        return []

def extract_headers(table):
    """
    Extracts and processes the headers from the table.
    """
    row1 = [text.replace('\n', ' ') for text in list(table[0].values())]
    row2 = list(table[1].values())
    row3 = list(table[2].values())
    row4 = [text.replace('\n', ' ') for text in list(table[4].values())]
    
    return row1, row2, row3, row4

def create_multi_index(row1, row2, row3, row4):
    """
    Creates a multi-level index for DataFrame columns based on the headers.
    """
    tuples = []
    for col in row1:
        if col == 'Unloaded Dimension':
            tuples.extend([(col, '', '', row4[0]), (col, '', '', row4[1])])
        elif col == 'Recommended Load':
            tuples.extend([(col, row2[0], row3[0], subcol) for subcol in row4[2:]])
        else:
            tuples.append((col, '', '', ''))
    
    return pd.MultiIndex.from_tuples(tuples)

def create_dataframe(headers, sample_data):
    """
    Creates a DataFrame from the processed headers and sample data.
    """
    row1, row2, row3, row4 = extract_headers(headers)
    columns = create_multi_index(row1, row2, row3, row4)
    return pd.DataFrame(sample_data, columns=columns)

def process_table(table_info, table_key):
    try:
        columns = table_info[table_key]['headers']
        table_data = table_info[table_key]['data']

        dr1 = [text.replace('\n', ' ') for text in list(table_data[0].values())]

        if len(dr1) <= 7:
            max_key = max(table_data[0])
            combined_dict = table_data[0].copy()

            for k,v in table_data[1][0].items():
                combined_dict[max_key + 1 + k] = v

            combined_data = list(combined_dict.values())

            data_rows = [combined_data]
            print('columns',len(columns.values()))
            print('columns', columns.values())
            print('rows', len(data_rows[0]))
            print('rows', data_rows[0])

            return create_dataframe(columns, data_rows)
        
        else:
            data_rows = [dr1]

        for i, dr in enumerate(table_data[1:], start=2):
            drs = globals()[f'dr{i}'] = [''] * 7 + list(dr.values())
            data_rows.append(drs)

        return create_dataframe(columns, data_rows)
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")
        return []

def save_to_csv(df, folder, filename):
    """
    Saves the DataFrame to a CSV file.
    """
    try:
        df.to_csv(os.path.join(folder,filename), index=False)
        print(f"CSV file '{filename}' saved successfully.")
    except Exception as e:
        print(f"No Important rows found!!!")

def main():
    # Configuration
    website = "https://yokohama-atg.com/usa/tire-selector-yokohama-off-highway-tires/"
    driver_path = "C:\\Users\\cheta\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe"  # Change according to your PC's file path
    my_designs = '310'
    # my_designs = input("Enter the design: ")
    folder = f'Alliance/{my_designs}'

    if not os.path.exists(folder):
        os.mkdir(folder)

    try:
        # Initialize WebDriver
        driver = setup_driver(driver_path)
        driver.get(website)

        # Accept cookies
        wait_and_click(driver, By.XPATH, '//span[@data-cookie-set="accept"]')

        # Select brand
        select_buttons = get_select_elements(driver, By.XPATH, '//span[@class="select2-arrow"]')
        brands_button = select_buttons[2]
        brands_button.click()
        brands = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
        brands[0].click()

        # Wait for the loader to disappear
        WebDriverWait(driver, 10).until(EC.invisibility_of_element((By.XPATH, '//div[@class="loader-wrapper"]')))

        # Select design
        design_button = select_buttons[3]
        design_button.click()
        available_designs = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
        selected_designs = get_designs([my_designs], available_designs)
        if selected_designs:
            selected_designs[0].click()

        wait_and_click(driver, By.XPATH, '//a[@title="Search"]')

        international_data = collect_table_info(driver, "data_in_mm")
        usa_data = collect_table_info(driver, "data_in_inch")

        print(international_data)
        print(usa_data)

        # international_filtered_data = collect_filtered_data(driver, "data_in_mm")
        # usa_filtered_data = collect_filtered_data(driver, "data_in_inch")

        # inter_table1 = process_table(international_data, 'Table_1')
        # save_to_csv(inter_table1, folder, 'inter_table1.csv')

        # inter_table2 = process_table(international_data, 'Table_2')
        # save_to_csv(inter_table2, folder, 'inter_table2.csv')

        # usa_table1 = process_table(usa_data, 'Table_1')
        # save_to_csv(usa_table1, folder, 'usa_table1.csv')

        # usa_table2 = process_table(usa_data, 'Table_2')
        # save_to_csv(usa_table2, folder, 'usa_table2.csv')

        # inter_table1_filtered = process_table(international_filtered_data, 'Table_1')
        # save_to_csv(inter_table1_filtered, folder, 'inter_table1_filtered.csv')

        # inter_table2_filtered  = process_table(international_filtered_data, 'Table_2')
        # save_to_csv(inter_table2_filtered, folder, 'inter_table2_filtered.csv')

        # usa_table1_filtered  = process_table(usa_filtered_data, 'Table_1')
        # save_to_csv(usa_table1_filtered, folder, 'usa_table1_filtered.csv')

        # usa_table2_filtered  = process_table(usa_filtered_data, 'Table_2')
        # save_to_csv(usa_table2_filtered, folder, 'usa_table2_filtered.csv')

    finally:
        time.sleep(10)
        driver.quit()
        print("Data extraction and processing complete.")

if __name__ == "__main__":
    main()

collect table info!!!
collect table info!!!
collect table info!!!
collect table info!!!
{'Table_1': {'headers': {0: {0: 'Size', 1: 'Rim', 2: 'Unloaded Dimension', 3: 'Loaded Static radius\nmm', 4: 'Rolling Circumference\nmm', 5: 'Load Index\nPR\nSymbols', 6: 'Inflation Pressure\nbar', 7: 'Recommended Load'}, 1: {0: 'Speed'}, 2: {0: 'Off-the road'}, 4: {0: 'SW\nmm', 1: 'OD\nmm', 2: 'Static\nkmph', 3: '5\nkmph', 4: '10\nkmph', 5: '25\nkmph'}}, 'data': [{0: '16.00-24 L-3', 1: '11.25', 2: '432', 3: '1493', 4: '685', 5: '4450', 6: '16PR\n181A2', 7: '3.5', 8: '11780', 9: '8320', 10: '7360', 11: '6260'}, {0: '3.75', 1: '12270', 2: '8670', 3: '7670', 4: '6520'}, {0: '4', 1: '12740', 2: '8990', 3: '7960', 4: '6770'}, {0: '4.25', 1: '13200', 2: '9320', 3: '8250', 4: '7010'}, {0: '20PR\n187A2', 1: '4.5', 2: '13870', 3: '9800', 4: '8670', 5: '7370'}, {0: '4.75', 1: '14320', 2: '10110', 3: '8950', 4: '7610'}, {0: '5', 1: '14750', 2: '10420', 3: '9220', 4: '7840'}, {0: '5.5', 1: '15600', 2: '11020',

In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
import os

def setup_driver(driver_path):
    """
    Set up the Chrome WebDriver.
    """
    service = Service(executable_path=driver_path)
    return webdriver.Chrome(service=service)

def wait_and_click(driver, by, value, timeout=10):
    """
    Wait for an element to be clickable and click it.
    """
    try:
        element = WebDriverWait(driver, timeout).until(
            EC.element_to_be_clickable((by, value))
        )
        element.click()
    except Exception as e:
        print(f"Element not found or could not be clicked: {str(e)}")

def get_select_elements(driver, by, value, timeout=10):
    """
    Wait for and return all elements matching the selector.
    """
    try:
        elements = WebDriverWait(driver, timeout).until(
            EC.presence_of_all_elements_located((by, value))
        )
        return elements
    except Exception as e:
        print(f"Elements not found: {str(e)}")
        return []

def get_designs(designs_list, available_designs):
    """
    Filter and return the selected designs from the available designs list.
    """
    selected_designs = []
    for design in available_designs:
        if design.text in designs_list:
            selected_designs.append(design)
    return selected_designs

In [25]:
website = "https://yokohama-atg.com/usa/tire-selector-yokohama-off-highway-tires/"
driver_path = "C:\\Users\\cheta\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe"  # Change according to your PC's file path
my_designs = '310'
# my_designs = input("Enter the design: ")
folder = f'Alliance/{my_designs}'

if not os.path.exists(folder):
    os.mkdir(folder)

try:
    # Initialize WebDriver
    driver = setup_driver(driver_path)
    driver.get(website)

    # Accept cookies
    wait_and_click(driver, By.XPATH, '//span[@data-cookie-set="accept"]')

    # Select brand
    select_buttons = get_select_elements(driver, By.XPATH, '//span[@class="select2-arrow"]')
    brands_button = select_buttons[2]
    brands_button.click()
    brands = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    brands[0].click()

    # Wait for the loader to disappear
    WebDriverWait(driver, 10).until(EC.invisibility_of_element((By.XPATH, '//div[@class="loader-wrapper"]')))

    # Select design
    design_button = select_buttons[3]
    design_button.click()
    available_designs = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    selected_designs = get_designs([my_designs], available_designs)
    if selected_designs:
        selected_designs[0].click()

    wait_and_click(driver, By.XPATH, '//a[@title="Search"]')

    div_class = 'data_in_mm'

    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_info = {}
        for i, table in enumerate(tables):
            header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
            headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
            headers_text = {
                index: {i: th.text for i, th in enumerate(th_list)}
                for index, th_list in headers_dict.items()
            }
            
            table_data_rows = table.find_elements(By.TAG_NAME, 'tbody')[0].find_elements(By.TAG_NAME, 'tr')
            data_rows = [{i: td.text for i, td in enumerate(row.find_elements(By.TAG_NAME, 'td'))} for row in table_data_rows]
            
            table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": data_rows
                }
        
        try:
            headers = table_info['Table_1']['headers']
            table_data = table_info['Table_1']['data']

            dr1 = [text.replace('\n', ' ') for text in list(table_data[0].values())]

            if len(dr1) <= 7:
                print('filtered')
                max_key = max(table_data[0])
                combined_dict = table_data[0].copy()

                for k,v in table_data[1][0].items():
                    combined_dict[max_key + 1 + k] = v

                combined_data = list(combined_dict.values())

                data_rows = [combined_data]
            
            else:
                print('all data')
                data_rows = [dr1]

            for i, dr in enumerate(table_data[1:], start=2):
                drs = globals()[f'dr{i}'] = [''] * 7 + list(dr.values())
                data_rows.append(drs)

            print(data_rows)

            row1 = [text.replace('\n', ' ') for text in list(headers[0].values())]
            row2 = list(headers[1].values())
            row3 = list(headers[2].values())
            row4 = [text.replace('\n', ' ') for text in list(headers[4].values())]

            print('row1', row1)
            print('row2', row2)
            print('row3', row3)
            print('row4', row4)

            tuples = []

            for col in row1:
                if col == 'Unloaded Dimension':
                    tuples.extend([(col, '', '', row4[0]), (col, '', '', row4[1])])
                elif col == 'Recommended Load':
                    tuples.extend([(col, row2[0], row3[0], subcol) for subcol in row4[2:]])
                else:
                    tuples.append((col, '', '', ''))

            columns = pd.MultiIndex.from_tuples(tuples)

            df = pd.DataFrame(data_rows, columns=columns)

        except Exception as e:
            print(f"An error occurred while fetching tables: {str(e)}")
                
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")

finally:
    time.sleep(10)
    driver.quit()
    print("Data extraction and processing complete.")

all data
[['16.00-24 L-3', '11.25', '432', '1493', '685', '4450', '16PR 181A2', '3.5', '11780', '8320', '7360', '6260'], ['', '', '', '', '', '', '', '3.75', '12270', '8670', '7670', '6520'], ['', '', '', '', '', '', '', '4', '12740', '8990', '7960', '6770'], ['', '', '', '', '', '', '', '4.25', '13200', '9320', '8250', '7010'], ['', '', '', '', '', '', '', '20PR\n187A2', '4.5', '13870', '9800', '8670', '7370'], ['', '', '', '', '', '', '', '4.75', '14320', '10110', '8950', '7610'], ['', '', '', '', '', '', '', '5', '14750', '10420', '9220', '7840'], ['', '', '', '', '', '', '', '5.5', '15600', '11020', '9750', '8290'], ['', '', '', '', '', '', '', '24PR\n190A2', '5.75', '15790', '11150', '9870', '8390'], ['', '', '', '', '', '', '', '6', '16190', '11440', '10120', '8600'], ['', '', '', '', '', '', '', '6.25', '16580', '11710', '10360', '8810'], ['', '', '', '', '', '', '', '6.5', '16960', '11980', '10600', '9010']]
row1 ['Size', 'Rim', 'Unloaded Dimension', 'Loaded Static radius mm', 

In [40]:
pd.DataFrame(data_rows,columns=columns)

ValueError: 12 columns passed, passed data had 13 columns

In [35]:
data_rows

[['16.00-24 L-3',
  '11.25',
  '432',
  '1493',
  '685',
  '4450',
  '16PR 181A2',
  '3.5',
  '11780',
  '8320',
  '7360',
  '6260'],
 ['', '', '', '', '', '', '', '3.75', '12270', '8670', '7670', '6520'],
 ['', '', '', '', '', '', '', '4', '12740', '8990', '7960', '6770'],
 ['', '', '', '', '', '', '', '4.25', '13200', '9320', '8250', '7010'],
 ['',
  '',
  '',
  '',
  '',
  '',
  '',
  '20PR\n187A2',
  '4.5',
  '13870',
  '9800',
  '8670',
  '7370'],
 ['', '', '', '', '', '', '', '4.75', '14320', '10110', '8950', '7610'],
 ['', '', '', '', '', '', '', '5', '14750', '10420', '9220', '7840'],
 ['', '', '', '', '', '', '', '5.5', '15600', '11020', '9750', '8290'],
 ['',
  '',
  '',
  '',
  '',
  '',
  '',
  '24PR\n190A2',
  '5.75',
  '15790',
  '11150',
  '9870',
  '8390'],
 ['', '', '', '', '', '', '', '6', '16190', '11440', '10120', '8600'],
 ['', '', '', '', '', '', '', '6.25', '16580', '11710', '10360', '8810'],
 ['', '', '', '', '', '', '', '6.5', '16960', '11980', '10600', '9010']

In [36]:
columns

MultiIndex([(                    'Size',      '',             '', ...),
            (                     'Rim',      '',             '', ...),
            (      'Unloaded Dimension',      '',             '', ...),
            (      'Unloaded Dimension',      '',             '', ...),
            ( 'Loaded Static radius mm',      '',             '', ...),
            ('Rolling Circumference mm',      '',             '', ...),
            (   'Load Index PR Symbols',      '',             '', ...),
            (  'Inflation Pressure bar',      '',             '', ...),
            (        'Recommended Load', 'Speed', 'Off-the road', ...),
            (        'Recommended Load', 'Speed', 'Off-the road', ...),
            (        'Recommended Load', 'Speed', 'Off-the road', ...),
            (        'Recommended Load', 'Speed', 'Off-the road', ...)],
           )

In [37]:
tuples

[('Size', '', '', ''),
 ('Rim', '', '', ''),
 ('Unloaded Dimension', '', '', 'SW mm'),
 ('Unloaded Dimension', '', '', 'OD mm'),
 ('Loaded Static radius mm', '', '', ''),
 ('Rolling Circumference mm', '', '', ''),
 ('Load Index PR Symbols', '', '', ''),
 ('Inflation Pressure bar', '', '', ''),
 ('Recommended Load', 'Speed', 'Off-the road', 'Static kmph'),
 ('Recommended Load', 'Speed', 'Off-the road', '5 kmph'),
 ('Recommended Load', 'Speed', 'Off-the road', '10 kmph'),
 ('Recommended Load', 'Speed', 'Off-the road', '25 kmph')]

In [41]:
import pandas as pd
from pandas import MultiIndex

data_rows = [['16.00-24 L-3', '11.25', '432', '1493', '685', '4450', '16PR 181A2', '3.5', '11780', '8320', '7360', '6260'],
             ['', '', '', '', '', '', '', '3.75', '12270', '8670', '7670', '6520'],
             ['', '', '', '', '', '', '', '4', '12740', '8990', '7960', '6770'],
             ['', '', '', '', '', '', '', '4.25', '13200', '9320', '8250', '7010'],
             ['', '', '', '', '', '', '20PR\n187A2', '4.5', '13870', '9800', '8670', '7370'],
             ['', '', '', '', '', '', '', '4.75', '14320', '10110', '8950', '7610'],
             ['', '', '', '', '', '', '', '5', '14750', '10420', '9220', '7840'],
             ['', '', '', '', '', '', '', '5.5', '15600', '11020', '9750', '8290'],
             ['', '', '', '', '', '', '24PR\n190A2', '5.75', '15790', '11150', '9870', '8390'],
             ['', '', '', '', '', '', '', '6', '16190', '11440', '10120', '8600'],
             ['', '', '', '', '', '', '', '6.25', '16580', '11710', '10360', '8810'],
             ['', '', '', '', '', '', '', '6.5', '16960', '11980', '10600', '9010']]

columns = MultiIndex.from_tuples([
    ('Size', '', '', ''),
    ('Rim', '', '', ''),
    ('Unloaded Dimension', '', '', 'SW mm'),
    ('Unloaded Dimension', '', '', 'OD mm'),
    ('Loaded Static radius mm', '', '', ''),
    ('Rolling Circumference mm', '', '', ''),
    ('Load Index PR Symbols', '', '', ''),
    ('Inflation Pressure bar', '', '', ''),
    ('Recommended Load', 'Speed', 'Off-the road', 'Static kmph'),
    ('Recommended Load', 'Speed', 'Off-the road', '5 kmph'),
    ('Recommended Load', 'Speed', 'Off-the road', '10 kmph'),
    ('Recommended Load', 'Speed', 'Off-the road', '25 kmph')
])

df = pd.DataFrame(data_rows, columns=columns)
df

Unnamed: 0_level_0,Size,Rim,Unloaded Dimension,Unloaded Dimension,Loaded Static radius mm,Rolling Circumference mm,Load Index PR Symbols,Inflation Pressure bar,Recommended Load,Recommended Load,Recommended Load,Recommended Load
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Speed,Speed,Speed,Speed
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Off-the road,Off-the road,Off-the road,Off-the road
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,SW mm,OD mm,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Static kmph,5 kmph,10 kmph,25 kmph
0,16.00-24 L-3,11.25,432.0,1493.0,685.0,4450.0,16PR 181A2,3.5,11780,8320,7360,6260
1,,,,,,,,3.75,12270,8670,7670,6520
2,,,,,,,,4.0,12740,8990,7960,6770
3,,,,,,,,4.25,13200,9320,8250,7010
4,,,,,,,20PR\n187A2,4.5,13870,9800,8670,7370
5,,,,,,,,4.75,14320,10110,8950,7610
6,,,,,,,,5.0,14750,10420,9220,7840
7,,,,,,,,5.5,15600,11020,9750,8290
8,,,,,,,24PR\n190A2,5.75,15790,11150,9870,8390
9,,,,,,,,6.0,16190,11440,10120,8600


In [52]:
import os
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def setup_driver(driver_path):
    """
    Set up the Chrome WebDriver.
    """
    service = Service(executable_path=driver_path)
    return webdriver.Chrome(service=service)

def wait_and_click(driver, by, value):
    element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((by, value)))
    element.click()

def get_select_elements(driver, by, value):
    return WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((by, value)))

def get_designs(my_designs, available_designs):
    return [design for design in available_designs if design.text == my_designs]

# Your script starts here
website = "https://yokohama-atg.com/usa/tire-selector-yokohama-off-highway-tires/"
driver_path = "C:\\Users\\cheta\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe"  # Change according to your PC's file path
my_designs = '310'
folder = f'Alliance/{my_designs}'

if not os.path.exists(folder):
    os.mkdir(folder)

try:
    driver = setup_driver(driver_path)
    driver.get(website)

    wait_and_click(driver, By.XPATH, '//span[@data-cookie-set="accept"]')

    select_buttons = get_select_elements(driver, By.XPATH, '//span[@class="select2-arrow"]')
    brands_button = select_buttons[2]
    brands_button.click()
    brands = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    brands[0].click()

    WebDriverWait(driver, 10).until(EC.invisibility_of_element((By.XPATH, '//div[@class="loader-wrapper"]')))

    design_button = select_buttons[3]
    design_button.click()
    available_designs = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    selected_designs = get_designs(my_designs, available_designs)
    if selected_designs:
        selected_designs[0].click()

    wait_and_click(driver, By.XPATH, '//a[@title="Search"]')

    div_class = 'data_in_mm'

    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_info = {}
        for i, table in enumerate(tables):
            header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
            headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
            headers_text = {
                index: {i: th.text for i, th in enumerate(th_list)}
                for index, th_list in headers_dict.items()
            }
            
            table_data_rows = table.find_elements(By.TAG_NAME, 'tbody')[0].find_elements(By.TAG_NAME, 'tr')
            data_rows = [{i: td.text for i, td in enumerate(row.find_elements(By.TAG_NAME, 'td'))} for row in table_data_rows]
            
            table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": data_rows
                }
        
        try:
            headers = table_info['Table_1']['headers']
            table_data = table_info['Table_1']['data']

            dr1 = [text.replace('\n', ' ') for text in list(table_data[0].values())]

            if len(dr1) <= 7:
                print('filtered')
                max_key = max(table_data[0])
                combined_dict = table_data[0].copy()

                for k, v in table_data[1].items():
                    combined_dict[max_key + 1 + k] = v

                combined_data = list(combined_dict.values())

                data_rows = [combined_data]
            else:
                print('all data')
                data_rows = [dr1]

            max_length = max(len(list(dr.values())) for dr in table_data)

            for i, dr in enumerate(table_data[1:], start=2):
                padding_length = max_length - len(list(dr.values()))
                drs = [''] * padding_length + list(dr.values())
                data_rows.append(drs)
            print(data_rows)

            row1 = [text.replace('\n', ' ') for text in list(headers[0].values())]
            row2 = list(headers[1].values())
            row3 = list(headers[2].values())
            row4 = [text.replace('\n', ' ') for text in list(headers[4].values())]

            print('row1', row1)
            print('row2', row2)
            print('row3', row3)
            print('row4', row4)

            tuples = []

            for col in row1:
                if col == 'Unloaded Dimension':
                    tuples.extend([(col, '', '', row4[0]), (col, '', '', row4[1])])
                elif col == 'Recommended Load':
                    tuples.extend([(col, row2[0], row3[0], subcol) for subcol in row4[2:]])
                else:
                    tuples.append((col, '', '', ''))

            columns = pd.MultiIndex.from_tuples(tuples)

            df = pd.DataFrame(data_rows, columns=columns)

        except Exception as e:
            print(f"An error occurred while processing tables: {str(e)}")
                
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")

finally:
    time.sleep(10)
    driver.quit()
    print("Data extraction and processing complete.")

all data
[['16.00-24 L-3', '11.25', '432', '1493', '685', '4450', '16PR 181A2', '3.5', '11780', '8320', '7360', '6260'], ['', '', '', '', '', '', '', '3.75', '12270', '8670', '7670', '6520'], ['', '', '', '', '', '', '', '4', '12740', '8990', '7960', '6770'], ['', '', '', '', '', '', '', '4.25', '13200', '9320', '8250', '7010'], ['', '', '', '', '', '', '20PR\n187A2', '4.5', '13870', '9800', '8670', '7370'], ['', '', '', '', '', '', '', '4.75', '14320', '10110', '8950', '7610'], ['', '', '', '', '', '', '', '5', '14750', '10420', '9220', '7840'], ['', '', '', '', '', '', '', '5.5', '15600', '11020', '9750', '8290'], ['', '', '', '', '', '', '24PR\n190A2', '5.75', '15790', '11150', '9870', '8390'], ['', '', '', '', '', '', '', '6', '16190', '11440', '10120', '8600'], ['', '', '', '', '', '', '', '6.25', '16580', '11710', '10360', '8810'], ['', '', '', '', '', '', '', '6.5', '16960', '11980', '10600', '9010']]
row1 ['Size', 'Rim', 'Unloaded Dimension', 'Loaded Static radius mm', 'Rolling

In [53]:
df

Unnamed: 0_level_0,Size,Rim,Unloaded Dimension,Unloaded Dimension,Loaded Static radius mm,Rolling Circumference mm,Load Index PR Symbols,Inflation Pressure bar,Recommended Load,Recommended Load,Recommended Load,Recommended Load
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Speed,Speed,Speed,Speed
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Off-the road,Off-the road,Off-the road,Off-the road
Unnamed: 0_level_3,Unnamed: 1_level_3,Unnamed: 2_level_3,SW mm,OD mm,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Static kmph,5 kmph,10 kmph,25 kmph
0,16.00-24 L-3,11.25,432.0,1493.0,685.0,4450.0,16PR 181A2,3.5,11780,8320,7360,6260
1,,,,,,,,3.75,12270,8670,7670,6520
2,,,,,,,,4.0,12740,8990,7960,6770
3,,,,,,,,4.25,13200,9320,8250,7010
4,,,,,,,20PR\n187A2,4.5,13870,9800,8670,7370
5,,,,,,,,4.75,14320,10110,8950,7610
6,,,,,,,,5.0,14750,10420,9220,7840
7,,,,,,,,5.5,15600,11020,9750,8290
8,,,,,,,24PR\n190A2,5.75,15790,11150,9870,8390
9,,,,,,,,6.0,16190,11440,10120,8600
