-
Notifications
You must be signed in to change notification settings - Fork 0
/
ScorecardCricketMatch
66 lines (57 loc) · 2.96 KB
/
ScorecardCricketMatch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy as np
def extract_batting_data(series_id, match_id):
URL = 'https://www.espncricinfo.com/series/'+ str(series_id) + '/scorecard/' + str(match_id)
page = requests.get(URL)
bs = BeautifulSoup(page.content, 'lxml')
table_body=bs.find_all('tbody')
batsmen_df = pd.DataFrame(columns=["Name","Desc","Runs", "Balls", "4s", "6s", "SR", "Team"])
for i, table in enumerate(table_body[0:4:2]):
rows = table.find_all('tr')
for row in rows[::2]:
cols=row.find_all('td')
cols=[x.text.strip() for x in cols]
if cols[0] == 'Extras':
continue
if len(cols) > 7:
batsmen_df = batsmen_df.append(pd.Series(
[re.sub(r"\W+", ' ', cols[0].split("(c)")[0]).strip(), cols[1],
cols[2], cols[3], cols[5], cols[6], cols[7], i+1],
index=batsmen_df.columns ), ignore_index=True)
else:
batsmen_df = batsmen_df.append(pd.Series(
[re.sub(r"\W+", ' ', cols[0].split("(c)")[0]).strip(), cols[1],
0, 0, 0, 0, 0, i+1], index = batsmen_df.columns), ignore_index=True)
for i in range(2):
dnb_row = bs.find_all("tfoot")[i].find_all("div")
for c in dnb_row:
dnb_cols = c.find_all('span')
dnb = [x.text.strip().split("(c)")[0] for x in dnb_cols]
dnb = filter(lambda item: item, [re.sub(r"\W+", ' ', x).strip() for x in dnb])
for dnb_batsman in dnb:
batsmen_df = batsmen_df.append(pd.Series([dnb_batsman, "DNB", 0, 0, 0, 0, 0, i+1], index = batsmen_df.columns), ignore_index =True)
return batsmen_df
def extract_bowling_data(series_id, match_id):
URL = 'https://www.espncricinfo.com/series/'+ str(series_id) + '/scorecard/' + str(match_id)
page = requests.get(URL)
bs = BeautifulSoup(page.content, 'lxml')
table_body=bs.find_all('tbody')
bowler_df = pd.DataFrame(columns=['Name', 'Overs', 'Maidens', 'Runs', 'Wickets',
'Econ', 'Dots', '4s', '6s', 'Wd', 'Nb','Team'])
for i, table in enumerate(table_body[1:4:2]):
rows = table.find_all('tr')
for row in rows:
cols=row.find_all('td')
cols=[x.text.strip() for x in cols]
bowler_df = bowler_df.append(pd.Series([cols[0], cols[1], cols[2], cols[3], cols[4], cols[5],
cols[6], cols[7], cols[8], cols[9], cols[10], (i==0)+1],
index=bowler_df.columns ), ignore_index=True)
return bowler_df
a = extract_batting_data(series_id = 18693, match_id = 1144999)
b = extract_bowling_data(series_id = 18693, match_id = 1144999)
with pd.ExcelWriter('output.xlsx') as writer:
a.to_excel(writer, sheet_name='Sheet_name_1')
b.to_excel(writer, sheet_name='Sheet_name_2')