In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re

In [4]:
def extract_oscar_table():
    url_oscars = "https://es.wikipedia.org/wiki/Premios_%C3%93scar"

    # Extract the Oscars table and store it in a DataFrame
    res_oscar = requests.get(url_oscars)

    # Check if the request was successful
    if res_oscar.status_code == 200:
        # Create BeautifulSoup object to access HTML content
        soup_oscar = BeautifulSoup(res_oscar.content, 'html.parser')

        # Find all tables with class 'wikitable'
        tables = soup_oscar.find_all("table", {"class": "wikitable"})

        if not tables:
            print("Error: No tables found with class 'wikitable'.")
            return (None, "No tables found with class 'wikitable'")

        # Select the first table (adjust if necessary)
        table_oscar = tables[0]

        # Get column names from the table
        columns = [th.getText().strip().lower() for th in table_oscar.find_all("th")]

        # Filter desired columns
        desired_columns = ['ceremonia', 'mejor película', 'mejor director', 'mejor actor', 'mejor actriz']
        column_indices = [columns.index(col) for col in desired_columns if col in columns]

        data = []
        # Traverse through table rows
        ceremony_found = False
        for row in table_oscar.find_all('tr'):
            # Get cells from the row
            cells = [td.getText().strip() for td in row.find_all("td")]

            # Check if we are at ceremony number 72 or later
            if cells and cells[0].startswith("72"):
                ceremony_found = True

            # If we have reached ceremony number 72, extract data
            if ceremony_found:
                # Check if there are enough cells in the row
                if len(cells) >= len(column_indices):
                    # Filter cells according to desired columns
                    filtered_cells = [cells[idx] for idx in column_indices]
                    # Add data to the list only if valid and matching desired columns
                    if len(filtered_cells) == len(column_indices) and any(filtered_cells):
                        data.append(tuple(filtered_cells))

        if data:
            # Create DataFrame with filtered data and desired columns
            df = pd.DataFrame(data, columns=desired_columns)
            return (df, "Success")
        else:
            print("No data found starting from ceremony number 72.")
            return (None, "No data found starting from ceremony number 72.")

    else:
        print(f"Error: Request failed with status code {res_oscar.status_code}")
        return (None, f"Request failed with status code {res_oscar.status_code}")

# Extract Oscars table and store it in a tuple
oscar_results = extract_oscar_table()

# Check the type and content of the result
print(f"Result type: {type(oscar_results)}")
print("----------------------------------")
print(f"Result content: {oscar_results}")

# Ensure the tuple has exactly two elements
if isinstance(oscar_results, tuple) and len(oscar_results) == 2:
    # Unpack the tuple
    df_oscar, status = oscar_results

    # Display the first few rows of the resulting DataFrame if successful
    if df_oscar is not None:
        print(df_oscar.head())
    else:
        print(f"Error extracting table: {status}")
else:
    print("Error: The returned result is not a tuple with exactly two elements.")

Result type: <class 'tuple'>
----------------------------------
Result content: (               ceremonia                                   mejor película  \
0        26 de marzo2000                                  American Beauty   
1        25 de marzo2001                                        Gladiator   
2     24 de marzode 2002                                 A Beautiful Mind   
3     23 de marzode 2003                                          Chicago   
4   29 de febrerode 2004    The Lord of the Rings: The Return of the King   
5   27 de febrerode 2005                              Million Dollar Baby   
6      5 de marzode 2006                                            Crash   
7   25 de febrerode 2007                                     The Departed   
8   24 de febrerode 2008                           No Country for Old Men   
9   22 de febrerode 2009                              Slumdog Millionaire   
10     7 de marzode 2010                                  The Hurt Locke

In [5]:
# Tabla
df_oscar

Unnamed: 0,ceremonia,mejor película,mejor director,mejor actor,mejor actriz
0,26 de marzo2000,American Beauty,S. Mendes(American Beauty),K. Spacey(American Beauty),H. Swank(Boys don't cry)
1,25 de marzo2001,Gladiator,S. Soderbergh(Traffic),R. Crowe(Gladiator),J. Roberts(Erin Brockovich)
2,24 de marzode 2002,A Beautiful Mind,R. Howard(A Beautiful Mind),D. Washington(Training Day),H. Berry(Monster's Ball)
3,23 de marzode 2003,Chicago,R. Polanski(The Pianist),A. Brody(The Pianist),N. Kidman(The Hours)
4,29 de febrerode 2004,The Lord of the Rings: The Return of the King,P. Jackson(The Lord of the Rings: The Return o...,S. Penn(Mystic River),C. Theron(Monster)
5,27 de febrerode 2005,Million Dollar Baby,C. Eastwood(Million Dollar Baby),J. Foxx(Ray),H. Swank(Million Dollar Baby)
6,5 de marzode 2006,Crash,A. Lee(Brokeback Mountain),P. S. Hoffman(Capote),R. Witherspoon(Walk the Line)
7,25 de febrerode 2007,The Departed,M. Scorsese(The Departed),F. Whitaker(The Last King of Scotland),H. Mirren(The Queen)
8,24 de febrerode 2008,No Country for Old Men,J. Coen E. Coen(No Country for Old Men),D. Day-Lewis(There Will Be Blood),M. Cotillard(La Môme)
9,22 de febrerode 2009,Slumdog Millionaire,D. Boyle(Slumdog Millionaire),S. Penn(Milk),K. Winslet(The Reader)
