In [90]:
import requests
import gspread
from bs4 import BeautifulSoup
from google.auth.transport.requests import Request
from oauth2client.service_account import ServiceAccountCredentials
from google.oauth2.credentials import Credentials

In [91]:
# Constants
WIKIPEDIA_URL = "https://en.wikipedia.org/wiki/List_of_FIFA_World_Cup_finals"
SHEET_API_URL = "https://sheets.googleapis.com/v4/spreadsheets/{spreadsheet_id}/values/{range}:append"
ACCESS_TOKEN = "ornate-keel-445806-n0-d0d367a02343"
SPREADSHEET_ID = "1U-DnI5-vhmcLCi3qS4luILHciTJKRbyRtqE0vtO3KcA" 
SHEET_RANGE = "Sheet1!A1:D1"

# https://docs.google.com/spreadsheets/d/1U-DnI5-vhmcLCi3qS4luILHciTJKRbyRtqE0vtO3KcA/edit?usp=sharing
# //*[@id="mw-content-text"]/div[1]/table[4]/thead/tr/th[1]

In [92]:
def extract_fifa_data():
    response = requests.get(WIKIPEDIA_URL)
    if response.status_code != 200:
        print(f"Failed to fetch Wikipedia page: {response.status_code}")
        return []

    soup = BeautifulSoup(response.content, "html.parser")

    tables = soup.find_all("table", {"class": "wikitable"})
    
    if len(tables) < 4:
        print("Specified table not found!")
        return []

    table = tables[1]
    rows = table.find_all("tr")[1:11]
    extracted_data = []
    
    for i, row in enumerate(rows):
        cells = row.find_all(["th", "td"])
        if len(cells) >= 4:
            year = cells[0].text.strip()
            winner = cells[1].text.strip()
            score = cells[2].text.strip()
            runners_up = cells[3].text.strip()
            extracted_data.append([year, winner, score, runners_up])
        else:
            print(f"Skipping row {i+1}: Incomplete data.")
    # print(extracted_data)
    return extracted_data
# extract_fifa_data()


In [93]:
def append_to_google_sheet(fifa_data):
    try:
        # Authentication
        credentials_file = 'C:/Users/hp/Downloads/ornate-keel-445806-n0-d0d367a02343.json'
        scope = ['https://www.googleapis.com/auth/spreadsheets']
        creds = ServiceAccountCredentials.from_json_keyfile_name(credentials_file, scope)

        # Authorization
        gc = gspread.authorize(creds)
        sheet = gc.open_by_key(SPREADSHEET_ID).sheet1
        
        sheet.append_rows(fifa_data, value_input_option="RAW")
        print("Data successfully appended to Google Sheet.")

    except FileNotFoundError:
        print(f"Credentials file not found: {credentials_file}")
    except gspread.exceptions.SpreadsheetNotFound:
        print(f"Spreadsheet not found. Check the SPREADSHEET_ID: {SPREADSHEET_ID}")
    except Exception as e:
        print(f"An error occurred: {e}")


In [94]:
if __name__ == "__main__":
    fifa_data = extract_fifa_data()
    if not fifa_data:
        print("No data extracted. Exiting...")
    else:
        print("Extracted Data:", fifa_data)
        append_to_google_sheet(fifa_data)

Extracted Data: [['1930', 'Uruguay', '4–2', 'Argentina'], ['1934', 'Italy', '2–1 (a.e.t.)', 'Czechoslovakia'], ['1938', 'Italy', '4–2', 'Hungary'], ['1950', 'Uruguay', '2–1[n 3]', 'Brazil'], ['1954', 'West Germany', '3–2', 'Hungary'], ['1958', 'Brazil', '5–2', 'Sweden'], ['1962', 'Brazil', '3–1', 'Czechoslovakia'], ['1966', 'England', '4–2 (a.e.t.)', 'West Germany'], ['1970', 'Brazil', '4–1', 'Italy'], ['1974', 'West Germany', '2–1', 'Netherlands']]
Data successfully appended to Google Sheet.
