# Building an Automated IPL Scorecard Generator (Web Scrapping)

The primary objective of this project was to research and collect data on the IPL 2023 matches from the official CricBuzz website. To achieve this, the project utilized the BeautifulSoup and Pandas libraries in Python for web scraping and organizing the acquired data into CSV files.

In [1]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import numpy as np
import re

In [2]:
url = 'https://www.cricbuzz.com/cricket-series/5945/indian-premier-league-2023/matches'
page = requests.get(url)
soup = bs(page.text)
n = 1
ipl_matches = []
for i,x in enumerate(soup.find_all('a', class_="text-hvr-underline")):
  if n > 74:
    break
  else:
    url = f"https://www.cricbuzz.com/live-cricket-scorecard/{'/'.join(x.get('href').split('/')[2:])}"
    ipl_matches.append(url)
    n+=1

In [3]:
len(ipl_matches)

74

In [4]:
ipl_matches[43]

'https://www.cricbuzz.com/live-cricket-scorecard/66320/dc-vs-gt-44th-match-indian-premier-league-2023'

In [5]:
page = requests.get(ipl_matches[2])
soup = bs(page.text)

In [6]:
soup = bs(page.text)
for i in soup.find('div', class_="cb-col cb-col-100 cb-bg-white").find_all('div'):
  try:
    print(i.find('div', id='innings_1').text)
    continue
  except:
    continue

   Lucknow Super Giants Innings 193-6 (20 Ov)   Batter  R B 4s 6s SR      Rahul (c)    c Axar b Chetan Sakariya   ►    8 12 0 1 66.67        K Mayers    b Axar   ►    73 38 2 7 192.11        Deepak Hooda    c Warner b Kuldeep Yadav   ►    17 18 0 0 94.44        Krunal Pandya    not out  15 13 0 1 115.38        Stoinis    c Sarfaraz Khan b Khaleel Ahmed   ►    12 10 0 1 120.00        Pooran (wk)    c Prithvi Shaw b Khaleel Ahmed   ►    36 21 2 3 171.43        Ayush Badoni    c Sarfaraz Khan b Chetan Sakariya   ►    18 7 1 2 257.14        K Gowtham    not out  6 1 0 1 600.00      Extras  8   (b 1, lb 2, w 5, nb 0, p 0)   Total  193    (6 wkts, 20 Ov)     Did not Bat   Mark Wood , Jaydev Unadkat , Ravi Bishnoi , Avesh Khan     Fall of Wickets19-1 (Rahul, 3.6), 98-2 (Deepak Hooda, 10.6), 100-3 (K Mayers, 11.3), 117-4 (Stoinis, 14.1), 165-5 (Pooran, 18.3), 187-6 (Ayush Badoni, 19.5)   Bowler O M R W NB WD ECO     Khaleel Ahmed   4 0 30 2 0 1 7.50        Mukesh Kumar   4 0 34 0 0 1 8.50     

In [18]:
l = 0

for url in ipl_matches:
  page = requests.get(url)
  soup = bs(page.text)
  # print('\nMatch no.',l,'\n')
  try:
    for i in soup.find('div', id=f'innings_{1}').find_all('div')[11::9]:
        if i.text.strip().split('  ')[2].strip()[0] == '(':
          l+=1
          break 
        else:
          pass
  except:
    print('Match is still pending hold your!!!')
    break

Match is still pending hold your!!!


In [8]:
def get_stats(m):
  batter, runs, balls, fours, sixes, sr, wicket, dismissal, catches = [], [], [], [], [], [], [], [], []
  t1, t2 = ' '.join(soup.find('h1').text.split()[:-7]).strip(',').split(' vs ')

  for i in soup.find('div', id=f'innings_{m}').find_all('div')[11::9]:
    stat0 = i.text.strip().split('  ')[2].strip()
    
    if stat0[0] == '(':
      break 
    else:
      if stat0.split()[0] == 'lbw':
        dismissal.append('lbw')
        catches.append('-')
        wicket.append(''.join(stat0.split('b')[-1]))
      elif stat0.split()[0] == 'b':
          wicket.append(stat0.split('b')[-1])
          dismissal.append('bowled')
          catches.append('-')
      elif stat0.split()[0] == 'c':
        if re.search('sub', stat0):
          catches.append(stat0.split(')')[-1].split('b')[0].strip())
        else:
          catches.append(' '.join(stat0.split('b')[0].split()[1:]))
        dismissal.append('caught')
        wicket.append(''.join(stat0.split('b')[-1]))
      else:
        dismissal.append(stat0)
        catches.append('-')
        wicket.append('-')

      n = len(i.text.strip().split('  ')) - 1
      batting = i.text.strip().split('  ')
      batter.append(batting[0])

      stat1 = batting[n].split()

      runs.append(stat1[0])
      balls.append(stat1[1])
      fours.append(stat1[2])
      sixes.append(stat1[3])
      sr.append(stat1[4])

  for i in soup.find('div', id=f'innings_{m}').find_all('div'):
    if re.search('Bowler', i.text):
      card = i.text.strip()
      bowlers = card.split('  ')[2::][::5]

      stat2 = ''.join(card.split('  ')[3::][::5]).split()

      overs = stat2[::7]
      maidens = stat2[1::7]
      Runs = stat2[2::7]
      wickets = stat2[3::7]
      no_balls = stat2[4::7]
      wide = stat2[5::7]
      economy = stat2[6::7]
      break
  if m == 1:
    team1 = t2
    team2 = t1
  else:
    team1 = t1
    team2 = t2

  batting = pd.DataFrame({'Innings': m,'Team': team1,'Batter': batter, 'Runs': runs, 'Balls': balls, '4s': fours, '6s': sixes, 'SR': sr,'Dismissal': dismissal, 'Wicket by': wicket, 'Catch': catches})
  batting.index = np.arange(1, len(batting) + 1)
  bowling = pd.DataFrame({'Team': team2,'Bowlers':bowlers, 'Overs': overs, 'Maidens': maidens, 'Runs': Runs, 'Wickets': wickets, 'No Balls': no_balls, 'Wide': wide, 'Economy': economy})
  bowling.index = np.arange(1, len(bowling) + 1)
  innings = pd.concat([batting,bowling], axis=1).replace(np.nan, '-')
  m+=1
  return innings

In [9]:
page = requests.get(ipl_matches[0])
soup = bs(page.text)

get_stats(1)

Unnamed: 0,Innings,Team,Batter,Runs,Balls,4s,6s,SR,Dismissal,Wicket by,Catch,Team.1,Bowlers,Overs,Maidens,Runs.1,Wickets,No Balls,Wide,Economy
1,1,Chennai Super Kings,Conway,1,6,0,0,16.67,bowled,Shami,-,Gujarat Titans,Shami,4,0,29,2,1,0,7.20
2,1,Chennai Super Kings,Ruturaj Gaikwad,92,50,4,9,184.0,caught,Alzarri Joseph,Shu,Gujarat Titans,Hardik Pandya (c),3,0,28,0,0,0,9.30
3,1,Chennai Super Kings,Moeen,23,17,4,1,135.29,caught,Rashid Khan,W Saha,Gujarat Titans,Joshua Little,4,0,41,1,0,0,10.20
4,1,Chennai Super Kings,Stokes,7,6,1,0,116.67,caught,Rashid Khan,W Saha,Gujarat Titans,Rashid Khan,4,0,26,2,0,0,6.50
5,1,Chennai Super Kings,Rayudu,12,12,0,1,100.0,bowled,Joshua Little,-,Gujarat Titans,Alzarri Joseph,4,0,33,2,0,0,8.20
6,1,Chennai Super Kings,Shivam Dube,19,18,0,1,105.56,caught,Shami,Rashid Khan,Gujarat Titans,Yash Dayal,1,0,14,0,0,0,14.00
7,1,Chennai Super Kings,Ravindra Jadeja,1,2,0,0,50.0,caught,Alzarri Joseph,Vijay Shankar,-,-,-,-,-,-,-,-,-
8,1,Chennai Super Kings,Dhoni (c & wk),14,7,1,1,200.0,not out,-,-,-,-,-,-,-,-,-,-,-
9,1,Chennai Super Kings,Santner,1,3,0,0,33.33,not out,-,-,-,-,-,-,-,-,-,-,-


In [10]:
get_stats(2)

Unnamed: 0,Innings,Team,Batter,Runs,Balls,4s,6s,SR,Dismissal,Wicket by,Catch,Team.1,Bowlers,Overs,Maidens,Runs.1,Wickets,No Balls,Wide,Economy
1,2,Gujarat Titans,Wriddhiman Saha (wk),25,16,2,2,156.25,caught,Rajvardhan Hangargekar,Shivam Du,Chennai Super Kings,Deepak Chahar,4,0,29,0,0,0,7.20
2,2,Gujarat Titans,Shubman Gill,63,36,6,3,175.0,caught,Tushar Deshpande,Ruturaj Gaikwad,Chennai Super Kings,Tushar Deshpande,3.2,0,51,1,1,1,15.30
3,2,Gujarat Titans,Sai Sudharsan,22,17,3,0,129.41,caught,Rajvardhan Hangargekar,Dhoni,Chennai Super Kings,Rajvardhan Hangargekar,4,0,36,3,1,3,9.00
4,2,Gujarat Titans,Hardik Pandya (c),8,11,0,0,72.73,bowled,Ravindra Jadeja,-,Chennai Super Kings,Mitchell Santner,4,0,32,0,0,0,8.00
5,2,Gujarat Titans,Vijay Shankar,27,21,2,1,128.57,caught,Rajvardhan Hangargekar,Santner,Chennai Super Kings,Ravindra Jadeja,4,0,28,1,0,0,7.00
6,2,Gujarat Titans,Rahul Tewatia,15,14,1,1,107.14,not out,-,-,-,-,-,-,-,-,-,-,-
7,2,Gujarat Titans,Rashid Khan,10,3,1,1,333.33,not out,-,-,-,-,-,-,-,-,-,-,-


In [11]:
innings = pd.concat([get_stats(1), get_stats(2)]).reset_index(drop=True)
innings

Unnamed: 0,Innings,Team,Batter,Runs,Balls,4s,6s,SR,Dismissal,Wicket by,Catch,Team.1,Bowlers,Overs,Maidens,Runs.1,Wickets,No Balls,Wide,Economy
0,1,Chennai Super Kings,Conway,1,6,0,0,16.67,bowled,Shami,-,Gujarat Titans,Shami,4,0,29,2,1,0,7.20
1,1,Chennai Super Kings,Ruturaj Gaikwad,92,50,4,9,184.0,caught,Alzarri Joseph,Shu,Gujarat Titans,Hardik Pandya (c),3,0,28,0,0,0,9.30
2,1,Chennai Super Kings,Moeen,23,17,4,1,135.29,caught,Rashid Khan,W Saha,Gujarat Titans,Joshua Little,4,0,41,1,0,0,10.20
3,1,Chennai Super Kings,Stokes,7,6,1,0,116.67,caught,Rashid Khan,W Saha,Gujarat Titans,Rashid Khan,4,0,26,2,0,0,6.50
4,1,Chennai Super Kings,Rayudu,12,12,0,1,100.0,bowled,Joshua Little,-,Gujarat Titans,Alzarri Joseph,4,0,33,2,0,0,8.20
5,1,Chennai Super Kings,Shivam Dube,19,18,0,1,105.56,caught,Shami,Rashid Khan,Gujarat Titans,Yash Dayal,1,0,14,0,0,0,14.00
6,1,Chennai Super Kings,Ravindra Jadeja,1,2,0,0,50.0,caught,Alzarri Joseph,Vijay Shankar,-,-,-,-,-,-,-,-,-
7,1,Chennai Super Kings,Dhoni (c & wk),14,7,1,1,200.0,not out,-,-,-,-,-,-,-,-,-,-,-
8,1,Chennai Super Kings,Santner,1,3,0,0,33.33,not out,-,-,-,-,-,-,-,-,-,-,-
9,2,Gujarat Titans,Wriddhiman Saha (wk),25,16,2,2,156.25,caught,Rajvardhan Hangargekar,Shivam Du,Chennai Super Kings,Deepak Chahar,4,0,29,0,0,0,7.20


In [12]:
path = '/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data'

In [13]:
for j in range(len(ipl_matches)):
  matches = '-'.join(ipl_matches[j].split('/')[-1].split('-')[:3])
  t = j+1
  print(f'{path}/Match {t} {matches.upper()}')

/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 1 CSK-VS-GT
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 2 PBKS-VS-KKR
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 3 LSG-VS-DC
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 4 RR-VS-SRH
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 5 MI-VS-RCB
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 6 CSK-VS-LSG
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 7 DC-VS-GT
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 8 PBKS-VS-RR
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 9 KKR-VS-RCB
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 10 SRH-VS-LSG
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 11 RR-VS-DC
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Matc

In [14]:
t1, t2 = ' '.join(soup.find('h1').text.split()[:-7]).strip(',').split(' vs ')

matches = '-'.join(ipl_matches[2].split('/')[-1].split('-')[:3])

In [15]:
soup.find('div', ng_repeat="ts in match.full_commentary.timestamps")

In [16]:
paths = []
for j in range(len(ipl_matches)):
  matches = '-'.join(ipl_matches[j].split('/')[-1].split('-')[:3])
  t = j+1
  print(f'{path}/Match {t} {matches.upper()}')
  paths.append(f'{path}/Match {t} {matches.upper()}')

/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 1 CSK-VS-GT
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 2 PBKS-VS-KKR
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 3 LSG-VS-DC
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 4 RR-VS-SRH
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 5 MI-VS-RCB
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 6 CSK-VS-LSG
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 7 DC-VS-GT
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 8 PBKS-VS-RR
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 9 KKR-VS-RCB
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 10 SRH-VS-LSG
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Match 11 RR-VS-DC
/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data/Matc

In [17]:
for j, url in enumerate(ipl_matches):
  try:
    path = '/content/drive/MyDrive/Colab Notebooks/# Akashik Records/IPL Data'
    matches = '-'.join(ipl_matches[j].split('/')[-1].split('-')[:3])
    t = j+1
    page = requests.get(url)
    soup = bs(page.text)
    innings = pd.concat([get_stats(1), get_stats(2)]).reset_index().rename(columns={'index': 'Batting Order'})
    innings.to_csv(f'{path}/Tata-IPL-2023-Match-#{t}-{matches.upper()}.csv', index=False)
  except:
    print("The match is still pending.")
    break

The match is still pending.
