In [7]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

# Set up Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
options.add_argument("--disable-blink-features=AutomationControlled")

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Navigate to ESPN Cricinfo IPL 2025 Top Buys Auction page
driver.get("https://www.espncricinfo.com/auction/ipl-2025-auction-1460972/top-buys")

# Wait for the table to load
try:
    WebDriverWait(driver, 40).until(EC.presence_of_element_located((By.XPATH, "//table[contains(@class, 'ds-w-full')]")))
    time.sleep(5)  # Extra time to ensure full loading
except Exception as e:
    print("Timeout Error: Could not find the expected elements.", e)
    driver.quit()
    exit()

# Parse HTML with BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")

# Locate auction player table
data = []
rows = soup.select("table tbody tr")  # Selecting rows in the table

for row in rows:
    try:
        columns = row.find_all("td")  # Extract columns for each player
        if len(columns) >= 5:  # Ensure the row contains all expected columns
            name = columns[0].get_text(strip=True)  # Player Name
            team = columns[1].get_text(strip=True)  # Team Abbreviation
            role = columns[2].get_text(strip=True)  # Role (BAT, BOWL, AR)
            base_price = columns[3].get_text(strip=True)  # Base Price
            final_price = columns[4].get_text(strip=True)  # Final Price
            
            # Append data to the list
            data.append([name, team, role, base_price, final_price])
    except AttributeError:
        continue

# Convert data to DataFrame and save to CSV
if data:
    df = pd.DataFrame(data, columns=["Player Name", "Team", "Role", "Base Price", "Final Price"])
    df.to_csv("ipl_2025_top_buys.csv", index=False)
    print("Scraping completed! Data saved to ipl_2025_top_buys.csv")
else:
    print("No auction data found. The page structure might have changed.")

# Close the WebDriver
driver.quit()

Scraping completed! Data saved to ipl_2025_top_buys.csv


In [4]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

# Set up Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Runs in headless mode for efficiency
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Navigate to the Sold Players page
url = "https://www.espncricinfo.com/auction/ipl-2025-auction-1460972/sold-players"
driver.get(url)

# Scroll to load all rows dynamically
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(5)  # Wait for rows to load

    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

# Wait for the table to load after scrolling
try:
    WebDriverWait(driver, 60).until(
        EC.presence_of_element_located((By.XPATH, "//table[contains(@class, 'ds-w-full')]"))
    )
    time.sleep(5)  # Additional wait to ensure all content is loaded
except Exception as e:
    print("Timeout Error: Could not locate the table.", e)
    driver.quit()
    exit()

# Parse HTML with BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")

# Locate the player table
data = []
rows = soup.select("table tbody tr")  # Extract rows from the player table

# Extract data from rows
for row in rows:
    try:
        columns = row.find_all("td")
        if len(columns) >= 5:  # Ensure all necessary columns are present
            player_name = columns[0].get_text(strip=True)  # Player Name
            team = columns[1].get_text(strip=True)  # Team
            role = columns[2].get_text(strip=True)  # Role
            base_price = columns[3].get_text(strip=True)  # Base Price
            final_price = columns[4].get_text(strip=True)  # Final Price

            # Append player data to the list
            data.append([player_name, team, role, base_price, final_price])
    except AttributeError:
        continue

# Save the extracted data to a CSV file
if data:
    df = pd.DataFrame(data, columns=["Player Name", "Team", "Role", "Base Price", "Final Price"])
    df.to_csv("ipl_2025_sold_players.csv", index=False)
    print("Scraping completed! Data saved to 'ipl_2025_sold_players.csv'.")
else:
    print("No data found. Please check the selectors or webpage structure.")

# Close the WebDriver
driver.quit()

Scraping completed! Data saved to 'ipl_2025_sold_players.csv'.


In [2]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

# Set up Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
options.add_argument("--disable-blink-features=AutomationControlled")

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Navigate to the ESPN Cricinfo IPL 2025 Unsold Players page
driver.get("https://www.espncricinfo.com/auction/ipl-2025-auction-1460972/unsold-players")

# Wait for the table to load
try:
    WebDriverWait(driver, 40).until(EC.presence_of_element_located((By.XPATH, "//table[contains(@class, 'ds-w-full')]")))
    time.sleep(5)  # Extra time to ensure full loading
except Exception as e:
    print("Timeout Error: Could not find the expected elements.", e)
    driver.quit()
    exit()

# Parse HTML with BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")

# Locate auction player table
data = []
rows = soup.select("table tbody tr")  # Selecting rows in the table

for row in rows:
    try:
        columns = row.find_all("td")  # Extract columns for each player
        if len(columns) >= 3:  # Adjust based on Unsold table structure
            name = columns[0].get_text(strip=True)  # Player Name
            role = columns[1].get_text(strip=True)  # Role
            base_price = columns[2].get_text(strip=True)  # Base Price
            
            # Append data to the list
            data.append([name, role, base_price])
    except AttributeError:
        continue

# Convert data to DataFrame and save to CSV
if data:
    df = pd.DataFrame(data, columns=["Player Name", "Role", "Base Price"])
    df.to_csv("ipl_2025_unsold_players.csv", index=False)
    print("Scraping completed! Data saved to ipl_2025_unsold_players.csv")
else:
    print("No auction data found. The page structure might have changed.")

# Close the WebDriver
driver.quit()

Scraping completed! Data saved to ipl_2025_unsold_players.csv


In [5]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

# Set up Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
options.add_argument("--disable-blink-features=AutomationControlled")

# Initialize WebDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Navigate to IPL 2025 auction squads page
url = "https://www.espncricinfo.com/auction/ipl-2025-auction-1460972"
driver.get(url)

# Wait for the squads section to load
try:
    WebDriverWait(driver, 40).until(EC.presence_of_element_located((By.XPATH, "//table[contains(@class, 'ds-w-full')]")))
    time.sleep(5)  # Extra loading time
except Exception as e:
    print("Timeout Error: Could not find the expected elements.", e)
    driver.quit()
    exit()

# Parse HTML with BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")

# Locate squad tables
data = []
tables = soup.select("table.ds-w-full")  # Adjusted selector to target squad tables

for table in tables:
    try:
        # Extract team name
        team_name = table.find_previous("a").get_text(strip=True)  # Get team name from the previous anchor tag

        # Extract purse details
        purse_info = table.find_previous("div").get_text(strip=True)  # Get purse spent, purse left, etc.
        purse_details = purse_info.split("|")  # Splitting details for structured data

        if len(purse_details) >= 4:
            purse_spent = purse_details[0]
            purse_left = purse_details[1]
            total_players = purse_details[2]
            overseas_players = purse_details[3]
        else:
            purse_spent = purse_left = total_players = overseas_players = "N/A"

        # Extract player details
        rows = table.select("tbody tr")  # Selecting player rows
        for row in rows:
            columns = row.find_all("td")
            if len(columns) >= 4:
                player_name = columns[0].get_text(strip=True)
                role = columns[1].get_text(strip=True)
                base_price = columns[2].get_text(strip=True)
                sold_price = columns[3].get_text(strip=True)

                data.append([team_name, purse_spent, purse_left, total_players, overseas_players,
                             player_name, role, base_price, sold_price])

    except AttributeError:
        continue

# Convert data to DataFrame and save to CSV
if data:
    df = pd.DataFrame(data, columns=["Team", "Purse Spent", "Purse Left", "Total Players", "Overseas Players",
                                     "Player Name", "Role", "Base Price", "Sold Price"])
    df.to_csv("ipl_2025_squads.csv", index=False)
    print("Scraping completed! Data saved to ipl_2025_squads.csv")
else:
    print("No squad data found. The page structure might have changed.")

# Close the WebDriver
driver.quit()

Scraping completed! Data saved to ipl_2025_squads.csv


In [5]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

# Set up Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Run without opening a browser
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# Navigate to the "All Players" page
url = "https://www.espncricinfo.com/auction/ipl-2025-auction-1460972/all-players"
driver.get(url)

# Wait for the table to load initially
try:
    WebDriverWait(driver, 40).until(
        EC.presence_of_element_located((By.XPATH, "//table[contains(@class, 'ds-w-full')]"))
    )
    time.sleep(5)
except Exception as e:
    print("Timeout Error: Could not locate the table.", e)
    driver.quit()
    exit()

# Scroll to load all rows dynamically
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(5)  # Wait for rows to load

    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

# Parse the page with BeautifulSoup after scrolling
soup = BeautifulSoup(driver.page_source, "html.parser")
rows = soup.select("table tbody tr")  # Extract all rows from the table

# Extract player data from rows
data = []
for row in rows:
    try:
        columns = row.find_all("td")
        if len(columns) >= 5:
            player_name = columns[0].get_text(strip=True)
            role = columns[1].get_text(strip=True)
            base_price = columns[2].get_text(strip=True)
            sold_price = columns[3].get_text(strip=True)
            team = columns[4].get_text(strip=True)

            # Add player data to the list
            data.append([player_name, role, base_price, sold_price, team])
    except AttributeError:
        continue

# Save the extracted data to a CSV file
if data:
    df = pd.DataFrame(data, columns=["Player Name", "Role", "Base Price", "Sold Price", "Team"])
    df.to_csv("ipl_2025_all_players.csv", index=False)
    print("Scraping completed! Data saved to 'ipl_2025_all_players.csv'.")
else:
    print("No data found. Please check the selectors or webpage structure.")

# Close the WebDriver
driver.quit()

Scraping completed! Data saved to 'ipl_2025_all_players.csv'.


In [3]:
import pandas as pd

# Load the CSV file
input_file = "ipl_2025_all_players.csv"
output_file = "ipl_2025_all_players_cleaned.csv"

# Read the CSV safely, skipping any badly formatted rows
try:
    df = pd.read_csv(input_file, header=None, on_bad_lines='skip')

    # If it's in one column, split it
    if df.shape[1] == 1:
        print("Splitting single-column data...")
        df_split = df[0].str.split(',', expand=True)

        # Check for expected columns (e.g., name, base price, sold price, team)
        if df_split.shape[1] >= 4:
            df_split.columns = ["Player Name", "Base Price", "Sold Price", "Team"]
        else:
            df_split.columns = [f"Column {i+1}" for i in range(df_split.shape[1])]

        df_split.to_csv(output_file, index=False)
        print("✅ Data cleaned and saved to", output_file)
    else:
        df.to_csv(output_file, index=False)
        print("✅ Data already in tabular format. Saved to", output_file)

except Exception as e:
    print("❌ Error processing file:", e)


✅ Data already in tabular format. Saved to ipl_2025_all_players_cleaned.csv


In [39]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
import pandas as pd
import time

url = "https://www.espncricinfo.com/records/tournament/batting-most-runs-career/indian-premier-league-2025-16622"

driver = uc.Chrome()
driver.get(url)
time.sleep(10)

# Find the table
table = driver.find_element(By.TAG_NAME, "table")

# Try to extract headers
th_elements = table.find_elements(By.TAG_NAME, "th")
headers = [th.text.strip() for th in th_elements]
print("Headers:", headers)

rows = []
for tr in table.find_elements(By.TAG_NAME, "tr"):
    cells = [td.text.strip() for td in tr.find_elements(By.TAG_NAME, "td")]
    if cells:
        rows.append(cells)

# Fallback: use first row as header if <th> tags were empty
if not headers and rows:
    headers = rows[0]
    rows = rows[1:]

df = pd.DataFrame(rows, columns=headers)
df.to_csv("ipl_2025_most_runs.csv", index=False)
print("✅ Data saved to ipl_2025_most_runs.csv")

driver.quit()


Headers: []
✅ Data saved to ipl_2025_most_runs.csv


In [40]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Updated URL for most wickets
url = "https://www.espncricinfo.com/records/tournament/bowling-most-wickets-career/indian-premier-league-2025-16622"

driver = uc.Chrome()
driver.get(url)
time.sleep(10)

# Find the table
table = driver.find_element(By.TAG_NAME, "table")

# Try to extract headers
th_elements = table.find_elements(By.TAG_NAME, "th")
headers = [th.text.strip() for th in th_elements]
print("Headers:", headers)

rows = []
for tr in table.find_elements(By.TAG_NAME, "tr"):
    cells = [td.text.strip() for td in tr.find_elements(By.TAG_NAME, "td")]
    if cells:
        rows.append(cells)

# Fallback: use first row as header if <th> tags were empty
if not headers and rows:
    headers = rows[0]
    rows = rows[1:]

df = pd.DataFrame(rows, columns=headers)
df.to_csv("ipl_2025_most_wickets.csv", index=False)
print("✅ Data saved to ipl_2025_most_wickets.csv")

driver.quit()


Headers: []
✅ Data saved to ipl_2025_most_wickets.csv


In [41]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Updated URL for IPL 2024 most runs
url = "https://www.espncricinfo.com/records/tournament/batting-most-runs-career/indian-premier-league-2024-15940"

driver = uc.Chrome()
driver.get(url)
time.sleep(10)

# Find the table
table = driver.find_element(By.TAG_NAME, "table")

# Try to extract headers
th_elements = table.find_elements(By.TAG_NAME, "th")
headers = [th.text.strip() for th in th_elements]
print("Headers:", headers)

rows = []
for tr in table.find_elements(By.TAG_NAME, "tr"):
    cells = [td.text.strip() for td in tr.find_elements(By.TAG_NAME, "td")]
    if cells:
        rows.append(cells)

# Fallback: use first row as header if <th> tags were empty
if not headers and rows:
    headers = rows[0]
    rows = rows[1:]

df = pd.DataFrame(rows, columns=headers)
df.to_csv("ipl_2024_most_runs.csv", index=False)
print("✅ Data saved to ipl_2024_most_runs.csv")

driver.quit()


Headers: []
✅ Data saved to ipl_2024_most_runs.csv


In [42]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
import pandas as pd
import time

# URL for IPL 2024 most wickets
url = "https://www.espncricinfo.com/records/tournament/bowling-most-wickets-career/indian-premier-league-2024-15940"

driver = uc.Chrome()
driver.get(url)
time.sleep(10)

# Find the table
table = driver.find_element(By.TAG_NAME, "table")

# Extract headers
th_elements = table.find_elements(By.TAG_NAME, "th")
headers = [th.text.strip() for th in th_elements]
print("Headers:", headers)

rows = []
for tr in table.find_elements(By.TAG_NAME, "tr"):
    cells = [td.text.strip() for td in tr.find_elements(By.TAG_NAME, "td")]
    if cells:
        rows.append(cells)

# Fallback: use first row as header if <th> tags were empty
if not headers and rows:
    headers = rows[0]
    rows = rows[1:]

df = pd.DataFrame(rows, columns=headers)
df.to_csv("ipl_2024_most_wickets.csv", index=False)
print("✅ Data saved to ipl_2024_most_wickets.csv")

driver.quit()


Headers: []
✅ Data saved to ipl_2024_most_wickets.csv


In [43]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
import pandas as pd
import time

# URL for IPL 2024 Most Valuable Players
url = "https://www.espncricinfo.com/series/indian-premier-league-2024-1410320/most-valuable-players"

driver = uc.Chrome()
driver.get(url)
time.sleep(10)

# Find the table
table = driver.find_element(By.TAG_NAME, "table")

# Extract headers
th_elements = table.find_elements(By.TAG_NAME, "th")
headers = [th.text.strip() for th in th_elements]
print("Headers:", headers)

rows = []
for tr in table.find_elements(By.TAG_NAME, "tr"):
    cells = [td.text.strip() for td in tr.find_elements(By.TAG_NAME, "td")]
    if cells:
        rows.append(cells)

# Fallback: use first row as header if <th> tags were empty
if not headers and rows:
    headers = rows[0]
    rows = rows[1:]

df = pd.DataFrame(rows, columns=headers)
df.to_csv("ipl_2024_mvp.csv", index=False)
print("✅ Data saved to ipl_2024_mvp.csv")

driver.quit()


Headers: ['Player', 'Team', 'Total Impact', 'Impact/Mat', 'Matches', 'Runs', 'Wkts']
✅ Data saved to ipl_2024_mvp.csv
