In [None]:
# Import necessary libraries
import requests
from bs4 import BeautifulSoup

# Define the URL and headers for the request
url = "https://www.transfermarkt.es/statistik/weltrangliste"

payload = {}
headers = {
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  'Accept-Language': 'es-ES,es;q=0.9',
  'Referer': 'https://www.transfermarkt.es/'
}

# Make the initial request to get the page content
response = requests.request("GET", url, headers=headers, data=payload)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Define the table class to locate the relevant data
soup.find_all(class_='responsive-table')

# Extract available dates from the dropdown menu
select = soup.find("select", {"name": "datum"})
options = select.find_all("option") if select else soup.find_all("option")

dates = [opt.get("value") for opt in options if opt.get("value")]

# Filter dates for December only
from datetime import datetime

selected_dates = [
    f for f in dates
    if datetime.strptime(f, "%Y-%m-%d").month == 12
]

# Construct URLs for each selected date
web_urls = [
    f'https://www.transfermarkt.es/statistik/weltrangliste/statistik/stat/ajax/yw1/datum/{date}/plus/0/galerie/0'
for date in selected_dates
]

# Generate paginated URLs
pages = []

for url in web_urls:
    for pag in range(1, 8):
        pages.append(f'{url}/page/{pag}')

# Initialize lists to store extracted data
years = []
positions = []
countries = []
confeds = []
points = []

# Loop through each page and extract the relevant data
for page in pages:
    resp = requests.get(page, headers=headers)
    soup = BeautifulSoup(resp.content, "html.parser")

    for row in soup.find_all("tr"):
        tds = row.find_all("td")
        if len(tds) >= 4:
            years.append(page.split("/datum/")[1].split("-")[0])
            positions.append(tds[0].get_text(strip=True))
            countries.append(tds[1].get_text(strip=True))
            confeds.append(tds[2].get_text(strip=True))
            points.append(tds[3].get_text(strip=True))

# Additionally, extract data for the year 2022
urls2022 = [
    f'https://www.transfermarkt.es/statistik/weltrangliste/statistik/stat/ajax/yw1/datum/2022-10-06/plus/0/galerie/0/page/{pag}'
    for pag in range(1, 8)
]
for page in urls2022:
    resp = requests.get(page, headers=headers)
    soup = BeautifulSoup(resp.content, "html.parser")

    for row in soup.find_all("tr"):
        tds = row.find_all("td")
        if len(tds) >= 4:
            years.append('2022')
            positions.append(tds[0].get_text(strip=True))
            countries.append(tds[1].get_text(strip=True))
            confeds.append(tds[2].get_text(strip=True))
            points.append(tds[3].get_text(strip=True))

# Create a DataFrame from the extracted data
import pandas as pd

df = pd.DataFrame({
    'Year': years,
    'Position': positions,
    'Country': countries,
    'Confederation': confeds,
    'Points': points
    })

# Save the DataFrame to a CSV file
import os

project_dir = os.getcwd()
csv_route = os.path.join(project_dir, "fifa_ranking.csv")
df.to_csv(csv_route, index=False, encoding="utf-8-sig")