In [2]:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time

url = 'https://www.iplt20.com/stats/'

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

response = requests.get(url, headers = headers)
response



all_data = []  # list to hold all seasons' data

for i in range(2008, 2026):
    url = f"https://www.iplt20.com/stats/{i}"
    print(f"Scraping IPL {i} stats...")

    driver = webdriver.Chrome()
    driver.get(url)

    # Wait for page load
    time.sleep(5)

    # Click the "View All" button
    try:
        view_all_btn = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//a[contains(text(),'View All')]"))
        )
        driver.execute_script("arguments[0].click();", view_all_btn)
        time.sleep(5)  # wait for table to load
    except Exception as e:
        print(f"View All button not found for {i}: {e}")

    # Get page source
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # Find the table
    table = soup.find("table")
    if table:
        rows = table.find("tbody").find_all("tr")

        # Scrape team names separately
        team_names = [t.get_text(strip=True) for t in soup.find_all("div", class_="st-ply-tm-name ng-binding")]

        # Extract data row by row
        for idx, row in enumerate(rows):
            cols = [c.get_text(strip=True) for c in row.find_all("td")]
            if cols:
                team = team_names[idx] if idx < len(team_names) else ""
                cols.insert(2, team)      # insert Team after Player
                cols.append(str(i))       # add Season at the end
                all_data.append(cols)
    else:
        print(f"No table found for {i}")

    driver.quit()

# Define columns (now including Team + Season)
columns = ["Pos", "Player","Team","Matches", "Innings", "Not Outs", "Runs",
           "HS", "Avg", "BF", "SR", "100s", "50s", "4s", "6s","Season"]

# Create single DataFrame
df = pd.DataFrame(all_data, columns=columns)
df['Season']
# Save one CSV
df.to_csv("ipl_stats_all_seasons_raw.csv", index=False)

print(f"✅ All seasons combined into ipl_stats_all_seasons.csv with {len(df)} rows")

Scraping IPL 2008 stats...
Scraping IPL 2009 stats...
Scraping IPL 2010 stats...
Scraping IPL 2011 stats...
Scraping IPL 2012 stats...
Scraping IPL 2013 stats...
Scraping IPL 2014 stats...
Scraping IPL 2015 stats...
Scraping IPL 2016 stats...
Scraping IPL 2017 stats...
Scraping IPL 2018 stats...
Scraping IPL 2019 stats...
Scraping IPL 2020 stats...
Scraping IPL 2021 stats...
Scraping IPL 2022 stats...
Scraping IPL 2023 stats...
Scraping IPL 2024 stats...
Scraping IPL 2025 stats...
✅ All seasons combined into ipl_stats_all_seasons.csv with 2664 rows


In [2]:
import pandas as pd

files = [
    "ipl_results_2008.csv",
    "ipl_results_2009.csv",
    "ipl_results_2010.csv",
    "ipl_results_2011.csv",
    "ipl_results_2012.csv",
    "ipl_results_2013.csv",
    "ipl_results_2014.csv",
    "ipl_results_2015.csv",
    "ipl_results_2016.csv",
    "ipl_results_2017.csv",
    "ipl_results_2018.csv",
    "ipl_results_2019.csv",
    "ipl_results_2020.csv",
    "ipl_results_2021.csv",
    "ipl_results_2022.csv",
    "ipl_results_2023.csv",
    "ipl_results_2024.csv",
    "ipl_results_2025.csv"
]

dfs = [pd.read_csv(f) for f in files]

final_df = pd.concat(dfs, ignore_index=True)
final_df.to_csv("ipl_results_all_seasons.csv", index=False)

print("✔ Combined CSV created successfully!")


✔ Combined CSV created successfully!
