In [43]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import os

# URL of the webpage containing the tables
url = 'https://mangacodex.com/oricon_weekly/2008/04_Abril.php'

# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all sections containing the tables
    sections = soup.find_all('section', class_='table_grid')

    # List to store DataFrames
    df_list = []

    # Iterate over each section
    for section in sections:
        # Iterate over each div within the section
        for div in section.find_all('div', recursive=False):
            # Extract the date range
            date_range_div = div.find('div', style=True)
            if date_range_div:
                date_range = date_range_div.get_text(strip=True)

                # Find the table within the div
                table = div.find('table', class_='table')
                if table:
                    # Extract table headers
                    headers = [th.get_text(strip=True) for th in table.find_all('th')]

                    # Extract table rows
                    rows = []
                    for tr in table.find_all('tr')[1:]:  # Skip the header row
                        cells = [td.get_text(strip=True) for td in tr.find_all('td')]
                        if cells:
                            rows.append(cells)

                    # Create DataFrame
                    df = pd.DataFrame(rows, columns=headers)

                    # Add date range column
                    df.insert(0, 'Date Range', date_range)

                    # Store in list
                    df_list.append(df)

    # Combine all DataFrames into a single DataFrame
    final_df = pd.concat(df_list, ignore_index=True)

else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

In [45]:
final_df

Unnamed: 0,Date Range,TITLE,WEEKLY,TOTAL
0,WEEKLY RANKING --- March 24 · 30 -- (*Since O...,Kimi ni Todoke #6,347.826,347.826
1,WEEKLY RANKING --- March 24 · 30 -- (*Since O...,Fullmetal Alchemist #19,317.252,317.252
2,WEEKLY RANKING --- March 24 · 30 -- (*Since O...,Nodame Cantabile #20,118.994,*118.994
3,WEEKLY RANKING --- March 24 · 30 -- (*Since O...,Bamboo Blade #8,104.365,104.365
4,WEEKLY RANKING --- March 24 · 30 -- (*Since O...,Tsukihime #6,98.344,98.344
...,...,...,...,...
295,WEEKLY RANKING --- April 28 · May 04,Chi’s Sweet Home #5,19805,58051
296,WEEKLY RANKING --- April 28 · May 04,Katekyo Hitman Reborn! #19,18710,470335
297,WEEKLY RANKING --- April 28 · May 04,Kaze no Daichi #46,18618,23767
298,WEEKLY RANKING --- April 28 · May 04,Kamen Rider Spirits #14,18556,77463


In [47]:
os.makedirs('test')
final_df.to_csv("test/oricon_charts_2008_04.csv")