In [1]:
import requests
from bs4 import BeautifulSoup

In [3]:
url = 'https://www.espncricinfo.com/records/tournament/team-match-results/icc-men-s-t20-world-cup-2022-23-14450'
response = requests.get(url)
response.raise_for_status()

In [5]:
# 1.b Parser Code
# Step 1: Create a list to store match summary links
match_summary_links = []

# Step 2: Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')

# Step 3: Selecting all rows we need from the target table
all_rows = soup.select('table.ds-w-full.ds-table.ds-table-xs.ds-table-auto > tbody > tr')

# Looping through each row to get the links
for row in all_rows:
    tds = row.find_all('td')
    row_url = "https://www.espncricinfo.com" + tds[6].find('a')['href']
    match_summary_links.append(row_url)

# Display the collected match summary links
print("Match Summary Links:")
for link in match_summary_links:
    print(link)


Match Summary Links:
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/england-vs-pakistan-final-1298179/full-scorecard
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/england-vs-india-2nd-semi-final-1298178/full-scorecard
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/new-zealand-vs-pakistan-1st-semi-final-1298177/full-scorecard
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/india-vs-zimbabwe-42nd-match-group-2-1298176/full-scorecard
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/bangladesh-vs-pakistan-41st-match-group-2-1298175/full-scorecard
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/netherlands-vs-south-africa-40th-match-group-2-1298174/full-scorecard
https://www.espncricinfo.com/series/icc-men-s-t20-world-cup-2022-23-1298134/england-vs-sri-lanka-39th-match-group-1-1298173/full-scorecard
https://www.espncricinf

In [13]:
def parse_match_details(match_url):
    response = requests.get(match_url)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the "Match Flow" section
    match_flow = soup.find('span', class_='ds-text-title-xs ds-font-bold ds-text-typo', string=lambda x: x and "Match Flow" in x)

    if match_flow is None:
        print("Match Flow not found in", match_url)
        return None

    # Get parent div and its sibling divs
    match_flow_div = match_flow.find_parent('div')
    sibling_divs = match_flow_div.find_parent('div').find_next_siblings('div')

    # Navigate to the first team's innings
    first_div_element = sibling_divs[0].find('div')
    first_ul_element = first_div_element.find('ul')
    team1_li = first_ul_element.find('li')
    team1 = team1_li.find('span').text.replace(" innings", "")

    # Navigate to the second team's innings
    second_div_element = first_div_element.find_next_sibling('div')
    second_ul_element = second_div_element.find('ul')
    team2_li = second_ul_element.find('li')
    team2 = team2_li.find('span').text.replace(" innings", "")

    # Combine the team names into a match info string
    match_info = f'{team1} Vs {team2}'

    # Extracting bowling summary
    bowling_summary = []

    tables = soup.select('div > table.ds-table')
    first_inning_rows = tables[1].select('tbody > tr')
    second_inning_rows = tables[3].select('tbody > tr')

    # Process first innings
    for row in first_inning_rows:
        tds = row.find_all('td')
        if len(tds) >= 11:
            bowling_summary.append({
                "match": match_info,
                "bowlingTeam": team2,
                "bowlerName": tds[0].find('a').text.replace(' ', ''),
                "overs": tds[1].text,
                "maiden": tds[2].text,
                "runs": tds[3].text,
                "wickets": tds[4].text,
                "economy": tds[5].text,
                "0s": tds[6].text,
                "4s": tds[7].text,
                "6s": tds[8].text,
                "wides": tds[9].text,
                "noBalls": tds[10].text
            })

    # Process second innings
    for row in second_inning_rows:
        tds = row.find_all('td')
        if len(tds) >= 11:
            bowling_summary.append({
                "match": match_info,
                "bowlingTeam": team1,
                "bowlerName": tds[0].find('a').text.replace(' ', ''),
                "overs": tds[1].text,
                "maiden": tds[2].text,
                "runs": tds[3].text,
                "wickets": tds[4].text,
                "economy": tds[5].text,
                "0s": tds[6].text,
                "4s": tds[7].text,
                "6s": tds[8].text,
                "wides": tds[9].text,
                "noBalls": tds[10].text
            })

    return bowling_summary

for match_url in match_summary_links:
    match_data = parse_match_details(match_url)
    print(match_data)

[{'match': 'Pakistan Vs England', 'bowlingTeam': 'England', 'bowlerName': 'Ben Stokes', 'overs': '4', 'maiden': '0', 'runs': '32', 'wickets': '1', 'economy': '8.00', '0s': '6', '4s': '1', '6s': '0', 'wides': '2', 'noBalls': '1'}, {'match': 'Pakistan Vs England', 'bowlingTeam': 'England', 'bowlerName': 'Chris Woakes', 'overs': '3', 'maiden': '0', 'runs': '26', 'wickets': '0', 'economy': '8.66', '0s': '7', '4s': '2', '6s': '1', 'wides': '2', 'noBalls': '0'}, {'match': 'Pakistan Vs England', 'bowlingTeam': 'England', 'bowlerName': 'Sam Curran', 'overs': '4', 'maiden': '0', 'runs': '12', 'wickets': '3', 'economy': '3.00', '0s': '15', '4s': '0', '6s': '0', 'wides': '0', 'noBalls': '0'}, {'match': 'Pakistan Vs England', 'bowlingTeam': 'England', 'bowlerName': 'Adil Rashid', 'overs': '4', 'maiden': '1', 'runs': '22', 'wickets': '2', 'economy': '5.50', '0s': '10', '4s': '1', '6s': '0', 'wides': '1', 'noBalls': '0'}, {'match': 'Pakistan Vs England', 'bowlingTeam': 'England', 'bowlerName': 'Chri

In [15]:
all_match_data = []
for match_url in match_summary_links:
    match_data = parse_match_details(match_url)
    if match_data:
        all_match_data.append(match_data)

# Save the output to a JSON string
json_output = json.dumps(all_match_data, indent=4)

# Save the JSON output to a file
with open('t20_world_cup_bowling_summary.json', 'w') as json_file:
    json_file.write(json_output)

print("Match details saved to t20_world_cup_bowling_summary.json")

Match details saved to t20_world_cup_bowling_summary.json


In [23]:
import csv
csv_file = 't20_world_cup_batting_summary.csv'
with open(csv_file, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=all_match_data[0].keys())
    writer.writeheader()  
    writer.writerows(all_match_data) 

print(f"Match details saved to {csv_file}")

IndexError: list index out of range