In [1]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy as np
import json

In [2]:
SERIES_ID = 8604
MATCH_ID = 287879
# PLAYERS_SHEET = "Eng-Pak Test Players"
# TEAMS_SHEET = "Eng-Pak 3rd Test Selected Teams"
MATCH_TYPE = "t20"

In [3]:
with open("match_info.json", "r") as read_file:
    match_info = json.load(read_file)

with open("points.json", "r") as read_file:
    points = json.load(read_file)

points = points[match_info["match_type"]]

In [4]:
points

{'run': 1,
 'boundary_bonus': 1,
 'six_bonus': 2,
 'fifty_bonus': 4,
 'century_bonus': 8,
 'duck': -3,
 'wicket': 25,
 'four_wkt': 4,
 'five_wkt': 8,
 'maiden': 4,
 'catch': 8,
 'stumping': 12,
 'run_out_throw': 8,
 'run_out_catch': 4,
 'capt_factor': 2,
 'vc_factor': 1.5,
 'in_xi': 4,
 'econ_thresholds': [2.5, 3.5, 4.51, 7, 8.1, 9.01],
 'sr_thresholds': [40, 50, 60]}

In [24]:
if MATCH_TYPE == "test":
    RUN = 1
    BOUNDARY_BONUS = 1
    SIX_BONUS = 2
    FIFTY_BONUS = 4
    CENTURY_BONUS = 8
    DUCK = -4

    WICKET = 25
    FOUR_WKT = 4
    FIVE_WKT = 8

    CATCH = 8
    STUMPING = 12
    RUN_OUT_THROW = 8
    RUN_OUT_CATCH = 4

    CAPT_FACTOR = 2
    VC_FACTOR = 1.5
    IN_XI = 4

In [25]:
if MATCH_TYPE == "odi":
    RUN = 1
    BOUNDARY_BONUS = 1
    SIX_BONUS = 2
    FIFTY_BONUS = 4
    CENTURY_BONUS = 8
    DUCK = -3

    WICKET = 25
    FOUR_WKT = 4
    FIVE_WKT = 8
    MAIDEN = 4

    CATCH = 8
    STUMPING = 12
    RUN_OUT_THROW = 8
    RUN_OUT_CATCH = 4

    CAPT_FACTOR = 2
    VC_FACTOR = 1.5
    IN_XI = 4

In [113]:
if MATCH_TYPE == "odi":
    RUN = 1
    BOUNDARY_BONUS = 1
    SIX_BONUS = 2
    FIFTY_BONUS = 4
    CENTURY_BONUS = 8
    DUCK = -3

    WICKET = 25
    FOUR_WKT = 4
    FIVE_WKT = 8
    MAIDEN = 4

    CATCH = 8
    STUMPING = 12
    RUN_OUT_THROW = 8
    RUN_OUT_CATCH = 4

    CAPT_FACTOR = 2
    VC_FACTOR = 1.5
    IN_XI = 4
    
    ECON_THRESHOLDS = [2.5, 3.5, 4.51, 7, 8.1, 9.01]
    
    SR_THRESHOLDS = [60, 50, 40]

In [114]:
if MATCH_TYPE == "t20":
    RUN = 1
    BOUNDARY_BONUS = 1
    SIX_BONUS = 2
    FIFTY_BONUS = 8
    CENTURY_BONUS = 16
    DUCK = -2

    WICKET = 25
    FOUR_WKT = 8
    FIVE_WKT = 16
    MAIDEN = 8

    CATCH = 8
    STUMPING = 12
    RUN_OUT_THROW = 8
    RUN_OUT_CATCH = 4

    CAPT_FACTOR = 2
    VC_FACTOR = 1.5
    IN_XI = 4
    
    ECON_THRESHOLDS = [4, 5, 6.01, 9, 10.1, 11.01]
    
    SR_THRESHOLDS = [50, 60, 70]

In [9]:
def extract_batting_data(series_id, match_id):
    URL = 'https://www.espncricinfo.com/series/'+ str(series_id) + '/scorecard/' + str(match_id)
    page = requests.get(URL)
    bs = BeautifulSoup(page.content, 'lxml')

    table_body=bs.find_all('tbody')
    batsmen_df = pd.DataFrame(columns=["Name","Desc","Runs", "Balls", "4s", "6s", "SR", "Team"])
    # for i, table in enumerate(table_body[0:4:2]):
    for i, table in enumerate(table_body[::2]):
        rows = table.find_all('tr')
        for row in rows[::2]:
            cols=row.find_all('td')
            cols=[x.text.strip() for x in cols]
            if cols[0] == 'Extras':
                continue
            if len(cols) == 1:
                continue
            if len(cols) >= 8:
                batsmen_df = batsmen_df.append(pd.Series(
                [re.sub(r"\W+", ' ', cols[0].split("(c)")[0]).strip(), cols[1], 
                int(cols[2]), int(cols[3]), int(cols[5]), int(cols[6]), float(cols[7]), i % 2], 
                index=batsmen_df.columns ), ignore_index=True)
    return batsmen_df

In [10]:
def extract_bowling_data(series_id, match_id):
    URL = 'https://www.espncricinfo.com/series/'+ str(series_id) + '/scorecard/' + str(match_id)
    page = requests.get(URL)
    bs = BeautifulSoup(page.content, 'lxml')

    table_body=bs.find_all('tbody')
    bowler_df = pd.DataFrame(columns=['Name', 'Overs', 'Maidens', 'Runs', 'Wickets',
                                      'Econ', 'Wd', 'Nb','Team'])
    for i, table in enumerate(table_body[1::2]):
        rows = table.find_all('tr')
        for row in rows:
            cols=row.find_all('td')
            cols=[x.text.strip() for x in cols]
            if len(cols) == 11:
                bowler_df = bowler_df.append(pd.Series([cols[0], float(cols[1]), int(cols[2]), int(cols[3]), int(cols[4]), float(cols[5]), 
                                                         float(cols[9]), int(cols[10]), (i + 1) % 2], 
                                                       index=bowler_df.columns ), ignore_index=True)
    return bowler_df

In [21]:
def find_xis(series_id, match_id):
    URL = 'https://www.espncricinfo.com/series/'+ str(series_id) + '/scorecard/' + str(match_id)
    page = requests.get(URL)
    bs = BeautifulSoup(page.content, 'lxml')

    table_body=bs.find_all('tbody')
    xis = []
    table_foot = bs.find_all('tfoot')

    for i, table in enumerate(table_foot):
        rows = table.find_all('tr')
        if len(rows) == 3:
            dnb_names = rows[1].find_all('span')
            dnb_names = [x.text.replace(u'\xa0', '').strip(' ,') for x in dnb_names]
            dnb_names = [re.sub(r"\W+", ' ', i.split("(c)")[0]).strip() for i in dnb_names]
            dnb_names = [i for i in dnb_names if i]
            xis.extend(dnb_names)
 
    for i, table in enumerate(table_body):
        rows = table.find_all('tr')
        for row in rows:
            cols=row.find_all('td')
            cols=[x.text.strip() for x in cols]
            if len(cols) >= 8:
                xis.append(re.sub(r"\W+", ' ', cols[0].split("(c)")[0]).strip())
            if len(cols) == 2:
                if cols[1] == 'Batsman' or cols[1] == 'Allrounder' or cols[1] == 'Bowler' or cols[1] == 'Wicketkeeper':
                    name = re.sub(r"\W+", ' ', cols[0].split("(c)")[0]).strip()
                    xis.append(names_dict[name])
    return set(xis)

In [13]:
batting_data = extract_batting_data(series_id = SERIES_ID, match_id = MATCH_ID)
bowling_data = extract_bowling_data(series_id = SERIES_ID, match_id = MATCH_ID)

In [19]:
[batting_data.columns.values.tolist()] + batting_data.values.tolist()

[['Name', 'Desc', 'Runs', 'Balls', '4s', '6s', 'SR', 'Team'],
 ['G Gambhir', 'c Mohammad Asif b Umar Gul', 75, 54, 8, 2, 138.88, 0],
 ['YK Pathan', 'c Shoaib Malik b Mohammad Asif', 15, 8, 1, 1, 187.5, 0],
 ['RV Uthappa', 'c Shahid Afridi b Sohail Tanvir', 8, 11, 1, 0, 72.72, 0],
 ['Yuvraj Singh', 'c & b Umar Gul', 14, 19, 1, 0, 73.68, 0],
 ['MS Dhoni', 'b Umar Gul', 6, 10, 0, 0, 60.0, 0],
 ['RG Sharma', 'not out', 30, 16, 2, 1, 187.5, 0],
 ['IK Pathan', 'not out', 3, 3, 0, 0, 100.0, 0],
 ['Mohammad Hafeez', 'c Uthappa b Singh', 1, 3, 0, 0, 33.33, 1],
 ['Imran Nazir', 'run out (Uthappa)', 33, 14, 4, 2, 235.71, 1],
 ['Kamran Akmal', 'b Singh', 0, 3, 0, 0, 0.0, 1],
 ['Younis Khan', 'c YK Pathan b Joginder Sharma', 24, 24, 4, 0, 100.0, 1],
 ['Shoaib Malik', 'c Sharma b IK Pathan', 8, 17, 0, 0, 47.05, 1],
 ['Misbah ul Haq', 'c Sreesanth b Joginder Sharma', 43, 38, 0, 4, 113.15, 1],
 ['Shahid Afridi', 'c Sreesanth b IK Pathan', 0, 1, 0, 0, 0.0, 1],
 ['Yasir Arafat', 'b IK Pathan', 15, 11, 2

In [20]:
[bowling_data.columns.values.tolist()] + bowling_data.values.tolist()

[['Name', 'Overs', 'Maidens', 'Runs', 'Wickets', 'Econ', 'Wd', 'Nb', 'Team'],
 ['Mohammad Asif', 3.0, 0, 25, 1, 8.33, 1.0, 0, 1],
 ['Sohail Tanvir', 4.0, 0, 29, 1, 7.25, 2.0, 0, 1],
 ['Shahid Afridi', 4.0, 0, 30, 0, 7.5, 0.0, 0, 1],
 ['Mohammad Hafeez', 3.0, 0, 25, 0, 8.33, 0.0, 0, 1],
 ['Umar Gul', 4.0, 0, 28, 3, 7.0, 1.0, 1, 1],
 ['Yasir Arafat', 2.0, 0, 19, 0, 9.5, 0.0, 0, 1],
 ['RP Singh', 4.0, 0, 26, 3, 6.5, 0.0, 1, 0],
 ['S Sreesanth', 4.0, 1, 44, 1, 11.0, 2.0, 0, 0],
 ['Joginder Sharma', 3.3, 0, 20, 2, 5.71, 2.0, 0, 0],
 ['YK Pathan', 1.0, 0, 5, 0, 5.0, 0.0, 0, 0],
 ['IK Pathan', 4.0, 0, 16, 3, 4.0, 1.0, 0, 0],
 ['Harbhajan Singh', 3.0, 0, 36, 0, 12.0, 1.0, 0, 0]]

In [102]:
# for index, row in batting_data.iterrows():
#     name = row["Name"]
#     dismissal = row["Desc"]
#     runs = row["Runs"]
#     balls = row['Balls']
#     fours = row["4s"]
#     sixes = row['6s']
#     strike_rate = row['SR']
    
#     if runs == 0:
# #         role = roles_dict[name]
#         role = "RANDOM"
#         # no deduction for bowler ducks
#         if role == "Bowler":
#             point_change = 0
#         else:
#             point_change = DUCK
#     else:
#         point_change = runs
    
#     role = roles_dict[name]
#     # Strike Rate
#     if role != "Bowler":
#         if (MATCH_TYPE == "odi" and balls >= 20) or (MATCH_TYPE == "t20" and balls >= 10):
#             sr_ind = np.searchsorted(SR_THRESHOLDS, strike_rate, side='right')
#             point_change += [-6, -4, -2, 0][sr_ind]

#     if runs >= 100:
#         point_change += CENTURY_BONUS
#     elif runs >= 50:
#         point_change += FIFTY_BONUS
    
#     point_change += sixes*SIX_BONUS
#     point_change += fours*BOUNDARY_BONUS
        
#     points_dict[name] += point_change

    # Fielding
#     if dismissal.find("sub (") == -1:
#         # caught and bowled
#         if dismissal.find("c & b") == 0:
#             fielder = dismissal.split("c & b")[1].strip()
#             fielder_com_name = [name for name in xis if fielder in name]
#             points_dict[fielder_com_name[0]] += CATCH
#         # catch
#         elif dismissal.find("c") == 0:
#             fielder = dismissal.split("c ")[1].split("b ")[0].strip()
#             fielder = re.sub(r"\W+", ' ', fielder).strip()
#             fielder_com_name = [name for name in xis if fielder in name]
#             points_dict[fielder_com_name[0]] += CATCH
#         # stumping
#         if dismissal.find("st") == 0:
#             fielder = dismissal.split("st ")[1].split("b ")[0].strip()
#             fielder = re.sub(r"\W+", ' ', fielder).strip()
#             fielder_com_name = [name for name in xis if fielder in name]
#             points_dict[fielder_com_name[0]] += STUMPING
#         # run out
#         if dismissal.find("run out") == 0:
#             fielders = [x.strip() for x in dismissal.split("run out")[1].replace('(', '').replace(')', '').split("/")]
#             fielders = [re.sub(r"\W+", ' ', i).strip() for i in fielders]
#             if len(fielders) >= 3:
#                 fielders = fielders[-2:]
#             if len(fielders) == 1:
#                 thrower = fielders[0]
#                 catcher = fielders[0]
#             else:
#                 thrower = fielders[0]
#                 catcher = fielders[1]
#             thrower_com_name = [name for name in xis if thrower in name]
#             catcher_com_name = [name for name in xis if catcher in name]
#             points_dict[thrower_com_name[0]] += RUN_OUT_THROW
#             points_dict[catcher_com_name[0]] += RUN_OUT_CATCH

In [112]:
for index, row in bowling_data.iterrows():
    name = row["Name"]
    wickets = row["Wickets"]
    overs = row["Overs"]
    econ_rate = row["Econ"]

    point_change = WICKET*wickets
    
    ## Economy rate
    if (MATCH_TYPE == "odi" and overs >= 5) or (MATCH_TYPE == "t20" and overs >= 2):
        econ_ind = np.searchsorted(ECON_THRESHOLDS, econ_rate, side='right')
        point_change += [6, 4, 2, 0, -2, -4, -6][econ_ind]
        
    # four and five wicket haul
    if wickets == 4:
        point_change += FOUR_WKT
    if wickets >= 5:
        point_change += FIVE_WKT
        
    print(name, point_change)
    print()
#     points_dict[name] += point_change

Mohammad Asif 25

Sohail Tanvir 25

Shahid Afridi 0

Mohammad Hafeez 0

Umar Gul 75

Yasir Arafat -2

RP Singh 75

S Sreesanth 21

Joginder Sharma 52

YK Pathan 0

IK Pathan 79

Harbhajan Singh -6

