In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

# URL
url = "https://www.espncricinfo.com/series/lanka-premier-league-2024-1421415/jaffna-kings-vs-kandy-falcons-qualifier-2-1428481/ball-by-ball-commentary"

# Chrome setup
options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage") 

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) 
driver.set_page_load_timeout(60)  # avoid long waits
driver.get(url)
time.sleep(6)  # wait for page load

data = []

# gradually scroll and capture
previous_count = 0
while True:
    # find all visible balls
    balls = driver.find_elements(By.CSS_SELECTOR, "span.ds-text-tight-s.ds-font-regular.ds-mb-1.lg\\:ds-mb-0.lg\\:ds-mr-3.ds-block.ds-text-center.ds-text-typo-mid1")
    
    # scrape new ones only
    for i in range(previous_count, len(balls)):
        ball = balls[i]
        ball_no = ball.text.strip()
        try:
            parent = ball.find_element(By.XPATH, "../../..")
            desc_div = parent.find_element(By.CSS_SELECTOR, "div.ds-ml-4.lg\\:ds-ml-3.ds-text-typo-mid1")
            desc = desc_div.text.strip().replace("\n", " ")
            print(f"{ball_no} ‚ûú {desc}")
            data.append({"Ball": ball_no, "Commentary": desc})
        except Exception as e:
            print(f"Skipping {ball_no}: {e}")
    
    previous_count = len(balls)
    
    # scroll down to load more
    driver.execute_script("window.scrollBy(0, 1500);")
    time.sleep(3)
    
    # break when no new balls load
    new_balls = driver.find_elements(By.CSS_SELECTOR, "span.ds-text-tight-s.ds-font-regular.ds-mb-1.lg\\:ds-mb-0.lg\\:ds-mr-3.ds-block.ds-text-center.ds-text-typo-mid1")
    if len(new_balls) == previous_count:
        break

# close driver
driver.quit()

# convert to DataFrame
df = pd.DataFrame(data)
df.to_csv("Q2.csv", index=False, encoding="utf-8-sig")
print("\n‚úÖ Saved to Q2.csv")

Anjitha Methmadu


19.6 ‚ûú Asitha Fernando to Ramesh Mendis, 2 runs Jaffna hold their nerve! What a game, what a finish! Low full-toss, darted on the pads, and all Mendis can do is hack this to deep midwicket. They run back for the second, but the throw is safely into Kusal Mendis's hands. Jaffna win by one run!
19.5 ‚ûú Asitha Fernando to Ramesh Mendis, SIX runs And he's found it! Low full-toss and Ramesh Mendis has absolutely smoked this straight down the ground, through the line and all the way
19.4 ‚ûú Asitha Fernando to Ramesh Mendis, 2 runs Cracking yorker on off, dug out down the ground to long-on. They scramble for a desperate two but the throw misses the stumps
19.3 ‚ûú Asitha Fernando to Ramesh Mendis, FOUR runs Finds the boundary! Full on off, just misses the yorker length. Clears the front leg, frees the arms, high elbow lofted straight drive and there's no chance for long-off
19.2 ‚ûú Asitha Fernando to Ramesh Mendis, no run low full-toss chipped to long-on on the bounce. Mendis turns down 

In [26]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
import re

# ESPN Cricinfo Match URL
url = "https://www.espncricinfo.com/series/lanka-premier-league-2024-1421415/jaffna-kings-vs-kandy-falcons-qualifier-2-1428481/ball-by-ball-commentary"

# Selenium setup
options = Options()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
driver.get(url)
time.sleep(6)  # Wait for page to fully load

data = []
previous_count = 0

print("\nüèè Starting ball-by-ball scraping...\n")

while True:
    # Find all visible ball numbers
    balls = driver.find_elements(By.CSS_SELECTOR, 
        "span.ds-text-tight-s.ds-font-regular.ds-mb-1.lg\\:ds-mb-0.lg\\:ds-mr-3.ds-block.ds-text-center.ds-text-typo-mid1")

    # Process newly loaded balls
    for i in range(previous_count, len(balls)):
        ball = balls[i]
        ball_no = ball.text.strip()

        try:
            parent = ball.find_element(By.XPATH, "../../..")
            desc_div = parent.find_element(By.CSS_SELECTOR, "div.ds-ml-4.lg\\:ds-ml-3.ds-text-typo-mid1")
            commentary = desc_div.text.strip().replace("\n", " ")

            # --- Parse fields ---
            over, ball_part = ball_no.split(".") if "." in ball_no else (ball_no, "")
            bowler = batter = runs = extra = ""

            # Pattern: "Bowler to Batter, Something"
            match = re.match(r"(.+?) to (.*?), (.+)", commentary)
            if match:
                bowler = match.group(1).strip()
                batter = match.group(2).strip()
                after_comma = match.group(3).strip()
                parts = after_comma.split()
                if len(parts) >= 2:
                    runs, extra = parts[0], parts[1]
                elif len(parts) == 1:
                    runs = parts[0]

            # Store structured data
            data.append({
                "Over": over,
                "Ball": ball_part,
                "Bowler": bowler,
                "Batter": batter,
                "Runs": runs,
                "Extra": extra,
                "Full_Commentary": commentary
            })

            # --- Display each ball neatly ---
            print(f"Ball: {over}.{ball_part}")
            print(f"  üèè Bowler: {bowler}")
            print(f"  üß¢ Batter: {batter}")
            print(f"  üíØ Runs: {runs}")
            print(f"  ‚ö° Extra: {extra}")
            print(f"  üìã Commentary: {commentary}\n")

        except Exception as e:
            print(f"‚ö†Ô∏è Skipping {ball_no}: {e}")

    previous_count = len(balls)

    # Scroll to load next batch of balls
    driver.execute_script("window.scrollBy(0, 1500);")
    time.sleep(3)

    # Stop when no new balls appear
    new_balls = driver.find_elements(By.CSS_SELECTOR, 
        "span.ds-text-tight-s.ds-font-regular.ds-mb-1.lg\\:ds-mb-0.lg\\:ds-mr-3.ds-block.ds-text-center.ds-text-typo-mid1")
    if len(new_balls) == previous_count:
        break

# Close the browser
driver.quit()

# Save to CSV
df = pd.DataFrame(data)
df.to_csv("lpl_commentary_structured.csv", index=False, encoding="utf-8-sig")

print("\n‚úÖ Scraping completed successfully!")
print("üìÇ Data saved to: lpl_commentary_structured.csv")



üèè Starting ball-by-ball scraping...

Ball: 19.6
  üèè Bowler: Asitha Fernando
  üß¢ Batter: Ramesh Mendis
  üíØ Runs: 2
  ‚ö° Extra: runs
  üìã Commentary: Asitha Fernando to Ramesh Mendis, 2 runs Jaffna hold their nerve! What a game, what a finish! Low full-toss, darted on the pads, and all Mendis can do is hack this to deep midwicket. They run back for the second, but the throw is safely into Kusal Mendis's hands. Jaffna win by one run!

Ball: 19.5
  üèè Bowler: Asitha Fernando
  üß¢ Batter: Ramesh Mendis
  üíØ Runs: SIX
  ‚ö° Extra: runs
  üìã Commentary: Asitha Fernando to Ramesh Mendis, SIX runs And he's found it! Low full-toss and Ramesh Mendis has absolutely smoked this straight down the ground, through the line and all the way

Ball: 19.4
  üèè Bowler: Asitha Fernando
  üß¢ Batter: Ramesh Mendis
  üíØ Runs: 2
  ‚ö° Extra: runs
  üìã Commentary: Asitha Fernando to Ramesh Mendis, 2 runs Cracking yorker on off, dug out down the ground to long-on. They scramble for 