In [19]:
# get all the imports

# all the imports 
import warnings

# Data manipulation
import numpy as np
import pandas as pd

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# HTTP requests and web scraping
import requests
from bs4 import BeautifulSoup
from io import StringIO

import urllib.parse

print("All imports work! Your environment is ready for data analysis and web scraping.")


All imports work! Your environment is ready for data analysis and web scraping.


In [20]:
#formatting for data set

pd.options.display.float_format = "{:,.2f}".format
pd.options.display.max_columns = None

pd.set_option("display.max_colwidth", None)

In [21]:
## pull the data set

fifa_data = "https://raw.githubusercontent.com/fern-1210/IronHack-w3-Modual1-Project3/main/Data/Clean/The%20Immaculate%20Data.csv"
fifa = pd.read_csv(fifa_data)

#view info 
display(fifa.shape)

(3684, 30)

In [22]:
#splitting name and building google url 


# split on whitespace
name_parts = fifa["Name"].str.split()

# first name = first part
fifa["First"] = name_parts.str[0]

# last name = last part (same as first if single token)
fifa["Last"] = name_parts.str[-1]


#function to create url
def make_search_url(first, last):
    """
    Build a Google search URL for a football player.

    Parameters:
    - first: first name (string)
    - last: last name (string)

    Returns:
    - A full Google search URL that searches for the player on zerozero with valor-de-mercado
    """
    # Build query first + last + zerozero + valor-de-mercado
    query = f"{first} {last} zerozero valor-de-mercado"
    
    # URL-encode the query so spaces and special characters are safe for URLs
    encoded = urllib.parse.quote_plus(query)
    
    
    return f"https://www.google.com/search?q={encoded}"

# Apply this function to every row in the DataFrame
fifa["SearchURL"] = fifa.apply(
    lambda row: make_search_url(row["First"], row["Last"]),
    axis=1  
)


fifa[["ID","Name","First","Last", "SearchURL" ]].head()
#fifa


Unnamed: 0,ID,Name,First,Last,SearchURL
0,176580,L. SuÃ¡rez,L.,SuÃ¡rez,https://www.google.com/search?q=L.+Su%C3%A1rez+zerozero+valor-de-mercado
1,194765,A. Griezmann,A.,Griezmann,https://www.google.com/search?q=A.+Griezmann+zerozero+valor-de-mercado
2,177003,L. ModriÄ‡,L.,ModriÄ‡,https://www.google.com/search?q=L.+Modri%C4%87+zerozero+valor-de-mercado
3,224334,M. AcuÃ±a,M.,AcuÃ±a,https://www.google.com/search?q=M.+Acu%C3%B1a+zerozero+valor-de-mercado
4,192985,K. De Bruyne,K.,Bruyne,https://www.google.com/search?q=K.+Bruyne+zerozero+valor-de-mercado


In [23]:
## import functions to work selenium 

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
import os
from datetime import datetime


In [None]:
### Updated code with batching, checkpoints, and error handling

def extract_market_value_data(driver, player_id, player_name, search_url, max_records=21):
    """
    Extract complete market value history for a player from zerozero.pt
    
    Parameters:
    - driver: Selenium WebDriver instance
    - player_id: FIFA ID from original dataframe
    - player_name: Player name from original dataframe
    - search_url: Google search URL
    - max_records: Maximum number of records to extract (default 21 for 2018-2024)
    
    Returns:
    - DataFrame with player market value history including all requested columns
    """
    
    all_rows = []
    
    try:
        # Step 1: Navigate to Google search
        print(f"  â†’ Searching Google for {player_name}...")
        driver.get(search_url)
        time.sleep(2)
        
        # Step 2: Click first zerozero result
        try:
            wait = WebDriverWait(driver, 10)
            
            # Find first search result containing 'zerozero'
            first_result = wait.until(
                EC.presence_of_element_located(
                    (By.XPATH, "//div[@id='search']//a[contains(@href, 'zerozero')]")
                )
            )
            
            player_profile_url = first_result.get_attribute('href')
            print(f"  â†’ Found zerozero profile: {player_profile_url}")
            
            driver.get(player_profile_url)
            time.sleep(2)
            
        except TimeoutException:
            print(f"  âœ— No zerozero results found for {player_name}")
            # Return row with NO_DATA identifier
            return pd.DataFrame([{
                'ID': player_id,
                'Name': player_name,
                'SearchURL': search_url,
                'Player': 'NO_DATA',
                'Date': 'NO_DATA',
                'Year': None,
                'Month': 'NO_DATA',
                'Day': 'NO_DATA',
                'Club': 'NO_DATA',
                'MarketValue': None,
                'â‚¬ Unit': 'NO_DATA',
                'Avg Year MV': None
            }])
        
        # Step 3: Navigate to "Valor de Mercado" (Market Value) page
        try:
            # Look for the market value link in the player menu
            market_value_link = driver.find_element(
                By.XPATH, 
                "//a[contains(@href, 'valor-de-mercado')]"
            )
            market_value_link.click()
            time.sleep(2)
            print(f"  â†’ Clicked 'Valor de Mercado' tab")
            
        except NoSuchElementException:
            print(f"  âœ— Market value tab not found for {player_name}")
            # Return row with NO_DATA identifier
            return pd.DataFrame([{
                'ID': player_id,
                'Name': player_name,
                'SearchURL': search_url,
                'Player': 'NO_MV_TAB',
                'Date': 'NO_DATA',
                'Year': None,
                'Month': 'NO_DATA',
                'Day': 'NO_DATA',
                'Club': 'NO_DATA',
                'MarketValue': None,
                'â‚¬ Unit': 'NO_DATA',
                'Avg Year MV': None
            }])
        
        # Step 4: Parse the page with BeautifulSoup
        page_source = driver.page_source
        html_code = BeautifulSoup(page_source, 'html.parser')
        
        # Step 5: Extract market value table
        market_value_table = html_code.find("table", attrs={"class": "zz-datatable zztable stats"})
        
        if not market_value_table:
            print(f"  âœ— Market value table not found for {player_name}")
            # Return row with NO_DATA identifier
            return pd.DataFrame([{
                'ID': player_id,
                'Name': player_name,
                'SearchURL': search_url,
                'Player': 'NO_TABLE',
                'Date': 'NO_DATA',
                'Year': None,
                'Month': 'NO_DATA',
                'Day': 'NO_DATA',
                'Club': 'NO_DATA',
                'MarketValue': None,
                'â‚¬ Unit': 'NO_DATA',
                'Avg Year MV': None
            }])
        
        market_value_rows = market_value_table.find_all("tr")
        
        # Step 6: Extract player name from page (for verification)
        player_div = html_code.find("div", attrs={"class": "zz-enthdr-top"})
        scraped_player_name = player_name  # fallback
        
        if player_div:
            player_h2 = player_div.find("h2", attrs={"class": "zz-ty-page-main"})
            if player_h2:
                scraped_player_name = player_h2.get_text(strip=True)
        
        # Step 7: Parse each row of market value history (LIMITED TO max_records)
        records_extracted = 0
        for i in range(1, len(market_value_rows)):
            if records_extracted >= max_records:
                print(f"  â†’ Reached max records limit ({max_records}), stopping extraction")
                break
                
            row = market_value_rows[i]
            cells = row.find_all("td")
            
            if len(cells) >= 4:
                # Extract date
                date = cells[0].get_text(strip=True)
                
                # Extract year to check if it's in range (2018-2024)
                try:
                    year = int(date.split("-")[0])
                    # Skip if year is outside 2018-2024 range
                    if year < 2018 or year > 2024:
                        continue
                except:
                    pass  # If we can't parse year, include it anyway
                
                # Extract club name
                club_main = cells[1].get_text(strip=True)
                club_sub = cells[2].get_text(strip=True)
                club = f"{club_main} {club_sub}".strip()
                
                # Extract market value
                market_value_cell = cells[3].get_text(strip=True)
                
                # Split numeric value and currency unit (e.g., "21.00 M â‚¬")
                parts = market_value_cell.replace('â‚¬', '').strip().split()
                market_value = parts[0] if len(parts) > 0 else ""
                currency_unit = parts[1] if len(parts) > 1 else ""
                
                # Add row with FIFA ID, Name, SearchURL
                all_rows.append({
                    'ID': player_id,
                    'Name': player_name,
                    'SearchURL': search_url,
                    'Player': scraped_player_name,
                    'Date': date,
                    'Club': club,
                    'MarketValue': market_value,
                    'â‚¬ Unit': currency_unit
                })
                
                records_extracted += 1
        
        print(f"  âœ“ Extracted {len(all_rows)} market value records for {player_name}")
        
    except Exception as e:
        print(f"  âœ— Error processing {player_name}: {str(e)}")
        # Return row with ERROR identifier
        return pd.DataFrame([{
            'ID': player_id,
            'Name': player_name,
            'SearchURL': search_url,
            'Player': f'ERROR: {str(e)[:50]}',
            'Date': 'NO_DATA',
            'Year': None,
            'Month': 'NO_DATA',
            'Day': 'NO_DATA',
            'Club': 'NO_DATA',
            'MarketValue': None,
            'â‚¬ Unit': 'NO_DATA',
            'Avg Year MV': None
        }])
    
    # Convert to DataFrame
    if all_rows:
        df = pd.DataFrame(all_rows)
        
        # Clean and convert MarketValue to numeric
        df["MarketValue"] = pd.to_numeric(df["MarketValue"], errors="coerce")
        
        # Split Date into Year, Month, Day
        date_parts = df["Date"].str.split("-", expand=True)
        df["Year"] = pd.to_numeric(date_parts[0], errors="coerce")
        df["Month"] = date_parts[1]
        df["Day"] = date_parts[2]
        
        # Calculate average market value per year
        avg_year_mv = df.groupby("Year")["MarketValue"].transform("mean")
        df["Avg Year MV"] = avg_year_mv
        
        # Reorder columns as requested
        df = df[[
            'ID', 'Name', 'SearchURL', 'Player', 'Date', 'Year', 'Month', 'Day', 
            'Club', 'MarketValue', 'â‚¬ Unit', 'Avg Year MV'
        ]]
        
        return df
    else:
        # Return row with NO_RECORDS identifier
        return pd.DataFrame([{
            'ID': player_id,
            'Name': player_name,
            'SearchURL': search_url,
            'Player': 'NO_RECORDS',
            'Date': 'NO_DATA',
            'Year': None,
            'Month': 'NO_DATA',
            'Day': 'NO_DATA',
            'Club': 'NO_DATA',
            'MarketValue': None,
            'â‚¬ Unit': 'NO_DATA',
            'Avg Year MV': None
        }])


def scrape_all_players_market_values(fifa_df, delay=10, headless=False, max_records_per_player=21, 
                                     checkpoint_interval=10, batch_num=None):
    """
    Scrape market value histories for all players in the FIFA DataFrame
    
    Parameters:
    - fifa_df: DataFrame with ID, Name, SearchURL columns
    - delay: Seconds to wait between requests (default 10)
    - headless: Run browser in headless mode
    - max_records_per_player: Maximum records to extract per player (default 21)
    - checkpoint_interval: Save progress every N players (default 10)
    - batch_num: Batch number for naming files
    
    Returns:
    - Combined DataFrame with all players' market value histories
    """
    
    # Create checkpoint directory if it doesn't exist
    checkpoint_dir = "scraping_checkpoints"
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    
    # Setup Chrome driver
    options = webdriver.ChromeOptions()
    if headless:
        options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
    
    driver = webdriver.Chrome(options=options)
    
    all_player_data = []
    batch_label = f"_batch{batch_num}" if batch_num else ""
    
    try:
        for idx, row in fifa_df.iterrows():
            print(f"\n[{idx + 1}/{len(fifa_df)}] Processing: {row['Name']}")
            
            player_df = extract_market_value_data(
                driver=driver,
                player_id=row['ID'],
                player_name=row['Name'],
                search_url=row['SearchURL'],
                max_records=max_records_per_player
            )
            
            if not player_df.empty:
                all_player_data.append(player_df)
            
            # Save checkpoint every N players
            if (idx + 1) % checkpoint_interval == 0:
                temp_df = pd.concat(all_player_data, ignore_index=True)
                checkpoint_file = f'{checkpoint_dir}/checkpoint{batch_label}_{idx+1}_players.csv'
                temp_df.to_csv(checkpoint_file, index=False)
                print(f"  ðŸ’¾ Checkpoint saved: {checkpoint_file}")
            
            # Be polite - wait between requests
            time.sleep(delay)
    
    except KeyboardInterrupt:
        print("\nâš  Scraping interrupted by user")
    except Exception as e:
        print(f"\nâœ— Unexpected error: {str(e)}")
    finally:
        driver.quit()
        print("\nâœ“ Browser closed")
    
    # Combine all player DataFrames
    if all_player_data:
        final_df = pd.concat(all_player_data, ignore_index=True)
        print(f"\nâœ“ Successfully scraped {len(all_player_data)} players")
        print(f"âœ“ Total records: {len(final_df)}")
        return final_df
    else:
        print("\nâœ— No data scraped")
        return pd.DataFrame()


# ============================================================================
# BATCH PROCESSING SETUP
# ============================================================================

def split_dataframe_into_batches(df, num_batches=6):
    """
    Split DataFrame into equal batches
    
    Parameters:
    - df: DataFrame to split
    - num_batches: Number of batches to create
    
    Returns:
    - List of DataFrames
    """
    batch_size = len(df) // num_batches
    batches = []
    
    for i in range(num_batches):
        start_idx = i * batch_size
        # Last batch gets any remaining rows
        if i == num_batches - 1:
            end_idx = len(df)
        else:
            end_idx = (i + 1) * batch_size
        
        batch = df.iloc[start_idx:end_idx].copy()
        batches.append(batch)
        print(f"Batch {i+1}: {len(batch)} players (rows {start_idx} to {end_idx-1})")
    
    return batches

# ============================================================================
# USAGE EXAMPLE - PROCESS ALL BATCHES
# ============================================================================

print("="*80)
print("SPLITTING FIFA DATA INTO 6 BATCHES")
print("="*80)

# Split fifa dataframe into 6 batches
fifa_batches = split_dataframe_into_batches(fifa, num_batches=6)

print(f"\nTotal players: {len(fifa)}")
print(f"Batches created: {len(fifa_batches)}")
print("\n" + "="*80)

# Process each batch
all_batch_results = []

for batch_num, batch_df in enumerate(fifa_batches, start=1):
    print(f"\n{'='*80}")
    print(f"PROCESSING BATCH {batch_num}/6")
    print(f"{'='*80}")
    print(f"Players in this batch: {len(batch_df)}")
    print(f"Estimated time: {len(batch_df) * 10 / 60:.1f} minutes")
    
    # Ask for confirmation before starting each batch
    user_input = input(f"\nStart batch {batch_num}? (y/n): ")
    if user_input.lower() != 'y':
        print(f"Skipping batch {batch_num}")
        continue
    
    # Run the scraper for this batch
    result_df = scrape_all_players_market_values(
        batch_df, 
        delay=10, 
        headless=False, 
        max_records_per_player=21,
        checkpoint_interval=10,
        batch_num=batch_num
    )
    
    # Save batch results
    if not result_df.empty:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f'fifa_market_values_batch{batch_num}_{timestamp}.csv'
        result_df.to_csv(filename, index=False)
        print(f"\nâœ“ Batch {batch_num} saved to '{filename}'")
        
        all_batch_results.append(result_df)
        
        # Display batch summary
        print("\n" + "="*80)
        print(f"BATCH {batch_num} SUMMARY")
        print("="*80)
        print(f"Total players processed: {result_df['Name'].nunique()}")
        print(f"Total records: {len(result_df)}")
        print(f"Players with NO_DATA: {len(result_df[result_df['Player'].str.contains('NO_', na=False)])}")
        if 'Year' in result_df.columns:
            valid_years = result_df[result_df['Year'].notna()]
            if not valid_years.empty:
                print(f"Year range: {valid_years['Year'].min():.0f} - {valid_years['Year'].max():.0f}")
    
    print(f"\n{'='*80}")
    print(f"BATCH {batch_num} COMPLETE")
    print(f"{'='*80}\n")
    
    # Pause between batches
    if batch_num < len(fifa_batches):
        input("Press Enter to continue to next batch...")


# ============================================================================
# COMBINE ALL BATCHES
# ============================================================================

if all_batch_results:
    print("\n" + "="*80)
    print("COMBINING ALL BATCHES")
    print("="*80)
    
    final_combined_df = pd.concat(all_batch_results, ignore_index=True)
    
    # Save final combined file
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    final_filename = f'fifa_market_values_ALL_BATCHES_{timestamp}.csv'
    final_combined_df.to_csv(final_filename, index=False)
    
    print(f"âœ“ All batches combined and saved to '{final_filename}'")
    print(f"\nFINAL SUMMARY:")
    print(f"Total players: {final_combined_df['Name'].nunique()}")
    print(f"Total records: {len(final_combined_df)}")
    print(f"Players with issues: {len(final_combined_df[final_combined_df['Player'].str.contains('NO_|ERROR', na=False)])}")
    
    # Show records per player
    print(f"\nRecords per player:")
    print(final_combined_df.groupby('Name').size().describe())
else:
    print("\nâœ— No batches were processed")


## Key Features Added:

### 1. **6 Batches** (614 players each):
#- Batch 1: rows 0-613
#- Batch 2: rows 614-1227
#- Batch 3: rows 1228-1841
#- Batch 4: rows 1842-2455
#- Batch 5: rows 2456-3069
#- Batch 6: rows 3070-3683

### 2. **Delay = 10 seconds** between each request

### 3. **NO_DATA Identifiers** for missing data:
#- `NO_DATA` - Player not found on Google
#- `NO_MV_TAB` - No market value tab found
#- `NO_TABLE` - No market value table found
#- `NO_RECORDS` - Table exists but no records
#- `ERROR: ...` - Exception occurred with error message

### 4. **Checkpoint Saving**:
#- Saves every 10 players
#- Stored in `scraping_checkpoints/` folder
#- Named: `checkpoint_batch{N}_{count}_players.csv`

### 5. **Batch Processing**:
#- Asks for confirmation before each batch
#- Saves each batch separately
#- Combines all batches at the end
#- Shows estimated time per batch



SPLITTING FIFA DATA INTO 6 BATCHES
Batch 1: 614 players (rows 0 to 613)
Batch 2: 614 players (rows 614 to 1227)
Batch 3: 614 players (rows 1228 to 1841)
Batch 4: 614 players (rows 1842 to 2455)
Batch 5: 614 players (rows 2456 to 3069)
Batch 6: 614 players (rows 3070 to 3683)

Total players: 3684
Batches created: 6


PROCESSING BATCH 1/6
Players in this batch: 614
Estimated time: 102.3 minutes

[1/614] Processing: L. SuÃ¡rez
  â†’ Searching Google for L. SuÃ¡rez...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/luis-suarez/504173
  âœ— Error processing L. SuÃ¡rez: name 'NoSuchElementException' is not defined

[2/614] Processing: A. Griezmann
  â†’ Searching Google for A. Griezmann...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/antoine-griezmann/115090/valor-de-mercado
  â†’ Clicked 'Valor de Mercado' tab
  â†’ Reached max records limit (21), stopping extraction
  âœ“ Extracted 21 market value records for A. Griezmann

[3/614] Processing: L. ModriÄ‡
  â†’

  temp_df = pd.concat(all_player_data, ignore_index=True)


  ðŸ’¾ Checkpoint saved: scraping_checkpoints/checkpoint_batch1_10_players.csv

[11/614] Processing: A. Vidal
  â†’ Searching Google for A. Vidal...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/arturo-vidal/35084
  â†’ Clicked 'Valor de Mercado' tab
  â†’ Reached max records limit (21), stopping extraction
  âœ“ Extracted 21 market value records for A. Vidal

[12/614] Processing: A. Florenzi
  â†’ Searching Google for A. Florenzi...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/alessandro-florenzi/147357
  â†’ Clicked 'Valor de Mercado' tab
  âœ“ Extracted 19 market value records for A. Florenzi

[13/614] Processing: P. Pogba
  â†’ Searching Google for P. Pogba...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/paul-pogba/106444
  âœ— Error processing P. Pogba: name 'NoSuchElementException' is not defined

[14/614] Processing: D. Alaba
  â†’ Searching Google for D. Alaba...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/davi

  temp_df = pd.concat(all_player_data, ignore_index=True)



[21/614] Processing: T. Delaney
  â†’ Searching Google for T. Delaney...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/thomas-delaney/103221
  â†’ Clicked 'Valor de Mercado' tab
  â†’ Reached max records limit (21), stopping extraction
  âœ“ Extracted 21 market value records for T. Delaney

[22/614] Processing: L. Messi
  â†’ Searching Google for L. Messi...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/lionel-messi/valor-de-mercado
  â†’ Clicked 'Valor de Mercado' tab
  âœ“ Extracted 20 market value records for L. Messi

[23/614] Processing: J. Kimmich
  â†’ Searching Google for J. Kimmich...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/joshua-kimmich/330909
  â†’ Clicked 'Valor de Mercado' tab
  âœ“ Extracted 18 market value records for J. Kimmich

[24/614] Processing: C. Tolisso
  â†’ Searching Google for C. Tolisso...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/corentin-tolisso/236421/valor-de-mercado
  â†’ Clicked

  temp_df = pd.concat(all_player_data, ignore_index=True)



[31/614] Processing: K. oke
  â†’ Searching Google for K. oke...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/koke/363108/valor-de-mercado
  â†’ Clicked 'Valor de Mercado' tab
  âœ“ Extracted 0 market value records for K. oke

[32/614] Processing: G. Wijnaldum
  â†’ Searching Google for G. Wijnaldum...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/gini-wijnaldum/32964/valor-de-mercado
  â†’ Clicked 'Valor de Mercado' tab
  âœ“ Extracted 4 market value records for G. Wijnaldum

[33/614] Processing: D. Alli
  â†’ Searching Google for D. Alli...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/dele-alli/236970
  â†’ Clicked 'Valor de Mercado' tab
  â†’ Reached max records limit (21), stopping extraction
  âœ“ Extracted 21 market value records for D. Alli

[34/614] Processing: A. Sandro
  â†’ Searching Google for A. Sandro...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/alex-sandro/76601
  â†’ Clicked 'Valor de Mercado' tab
  

  temp_df = pd.concat(all_player_data, ignore_index=True)



[41/614] Processing: N. KantÃ©
  â†’ Searching Google for N. KantÃ©...
  âœ— Error processing N. KantÃ©: name 'TimeoutException' is not defined

[42/614] Processing: M. Reus
  â†’ Searching Google for M. Reus...
  âœ— Error processing M. Reus: name 'TimeoutException' is not defined

[43/614] Processing: K. Walker
  â†’ Searching Google for K. Walker...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/kyle-walker/92774
  â†’ Clicked 'Valor de Mercado' tab
  âœ“ Extracted 17 market value records for K. Walker

[44/614] Processing: M. Sissoko
  â†’ Searching Google for M. Sissoko...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/moussa-sissoko/42537/valor-de-mercado
  â†’ Clicked 'Valor de Mercado' tab
  â†’ Reached max records limit (21), stopping extraction
  âœ“ Extracted 21 market value records for M. Sissoko

[45/614] Processing: A. Rabiot
  â†’ Searching Google for A. Rabiot...
  â†’ Found zerozero profile: https://www.zerozero.pt/jogador/adrien-rabiot/1

  temp_df = pd.concat(all_player_data, ignore_index=True)



âš  Scraping interrupted by user

âœ“ Browser closed

âœ“ Successfully scraped 50 players
âœ“ Total records: 841


  final_df = pd.concat(all_player_data, ignore_index=True)



âœ“ Batch 1 saved to 'fifa_market_values_batch1_20260128_174333.csv'

BATCH 1 SUMMARY
Total players processed: 50
Total records: 841
Players with NO_DATA: 3
Year range: 2018 - 2024

BATCH 1 COMPLETE


PROCESSING BATCH 2/6
Players in this batch: 614
Estimated time: 102.3 minutes
