In [1]:
url_dict = {'https://stats.espncricinfo.com/ci/engine/records/averages/batting_bowling_by_team.html?id=13533;team=4346;type=tournament':'MumbaiIndians',
           'https://stats.espncricinfo.com/ci/engine/records/averages/batting_bowling_by_team.html?id=13533;team=4343;type=tournament':'ChennaiSuperKings',
           'https://stats.espncricinfo.com/ci/engine/records/averages/batting_bowling_by_team.html?id=13533;team=5143;type=tournament':'SunrisersHyderabad',
           'https://stats.espncricinfo.com/ci/engine/records/averages/batting_bowling_by_team.html?id=13533;team=4344;type=tournament':'DelhiCapitals',
           'https://stats.espncricinfo.com/ci/engine/records/averages/batting_bowling_by_team.html?id=13533;team=4342;type=tournament':'PunjabLions',
           'https://stats.espncricinfo.com/ci/engine/records/averages/batting_bowling_by_team.html?id=13533;team=4341;type=tournament':'KolkataKnightRiders',
           'https://stats.espncricinfo.com/ci/engine/records/averages/batting_bowling_by_team.html?id=13533;team=4345;type=tournament':'RajasthanRoyals',
            'https://stats.espncricinfo.com/ci/engine/records/averages/batting_bowling_by_team.html?id=13533;team=4340;type=tournament':'RoyalChallengersBangalore'
           }

In [2]:
import numpy as np
import pandas as pd
import requests
from pymongo import MongoClient
from retry_requests import retry
from requests import Session
str_ = 'batbowl'


In [3]:

def _mongo_insert(data, team_name):
    client = MongoClient("localhost", 27017)
    db = client.IPL2020
    collection = db[team_name+str_]
    collection.insert_many(data.to_dict('records'))
    client.close()

def scrape_table(url,team_name):
    my_session = retry(Session(), retries=100, backoff_factor=0.2)
    my_session.get(url,timeout =10000)
    df = pd.read_html(url, header = 0)
    df[1] = df[1].rename(columns={'Runs':'BRuns','Inns':'BInns','SR':'BSR','Ave':'BAve'})
    df[1] = df[1].drop(['BBI'],axis=1)
    f_df = pd.concat([df[0],df[1]])
    for col in f_df.columns:
            f_df[col] = f_df[col].replace(['-'], np.nan)
    f_df.iloc[:,1:26]=f_df.iloc[:,1:26].replace({'\*':''},regex=True).astype(float)
          
    _mongo_insert(f_df,team_name)
    


In [None]:
for url, team in url_dict.items():
    scrape_table(url,team)
    

Total runs, bowls ,wickets

In [4]:
def totals_constant():
    totals = []
    
    for team in url_dict.values():
        data = fetch_aggregated_data(team)
        totals.append([data.Runs.sum(),6*(data.Overs.sum()),data.Wkts.sum(),data.BRuns.sum()])
    
    totals = pd.DataFrame(totals,columns=['Runs','balls','Wickets','RunsConcded'])
    

    
    return totals.Runs.sum()/totals.Wickets.sum(), totals.Wickets.sum()/totals.balls.sum(), totals.Runs.sum()/totals.balls.sum(),totals.RunsConcded.sum()/totals.Wickets.sum(),totals.Wickets.sum()/totals.balls.sum(), totals.RunsConcded.sum()/totals.balls.sum()
    



def fetch_aggregated_data(team_name):

    client = MongoClient('mongodb://localhost:27017/?readPreference=primary&appname=MongoDB%20Compass&ssl=false')
    result = client['IPL2020'][team_name+str_].aggregate([
        {
            '$addFields': {
                'MRA': {
                    '$divide': [
                        {
                            '$add': [
                                '$50', '$100'
                            ]
                        }, '$Inns'
                    ]
                }, 
                'outrate': {
                    '$divide': [
                        {
                            '$subtract': [
                                '$Inns', '$NO'
                            ]
                        }, '$BF'
                    ]
                }, 
                'BRPI': {
                    '$divide': [
                        {
                            '$add': [
                                {
                                    '$multiply': [
                                        '$4s', 4
                                    ]
                                }, {
                                    '$multiply': [
                                        '$6s', 6
                                    ]
                                }
                            ]
                        }, '$Inns'
                    ]
                }, 
                'Boutrate': {
                    '$divide': [
                        '$Wkts', {
                            '$multiply': [
                                '$Overs', 6
                            ]
                        }
                    ]
                }
            }
        }
    ])
    return pd.DataFrame(list(result))


In [5]:
Bat_gen_avg, Bat_gen_outrate , Bat_gen_sr, Bowl_gen_avg, Bowl_gen_outrate , Bowl_gen_sr = totals_constant()

AGR = ((tbatsman − Bat gen sr ∗ nb) + Bat gen avg ∗ nb ∗ (Gen outrate − outrate)),

In [6]:
def rI_bat():
#      AGR 10* Bat gen avg
    ri = {}
    for team in url_dict.values():
        data = fetch_aggregated_data(team)
         
        AGR_pm = agr_batting(data)
        
        ri_ = AGR_pm / (10* Bat_gen_avg)
        ri[team] = ri_.mean()
    
    return ri

def rI_bowl():
    
    ri = {}
    for team in url_dict.values():
        data = fetch_aggregated_data(team)
         
        AGR_pm = agr_bowling(data)
        
        ri_ = AGR_pm / (10* Bat_gen_avg)
        ri[team] = ri_.mean()
        
    return ri


AGR = (Bowl gen sr ∗ t bowl − truns) + Bowl gen avg ∗ tb ∗ (Bowl gen outrate − outrate)

In [7]:
def agr_batting(data):


    return ((data.Runs -(Bat_gen_sr* data.BF)) + Bat_gen_avg * data.BF * (Bat_gen_outrate - data.outrate) )     
        

def agr_bowling(data):

        
    return (Bowl_gen_sr*(data.Overs *6) - data.BRuns) + Bowl_gen_avg * (data.Overs *6) * (Bowl_gen_outrate - data.Boutrate)

In [8]:
import operator
from collections import OrderedDict

In [9]:
sort_bat_dict= dict(sorted(rI_bat().items(), key=operator.itemgetter(1))) 
print("Sorted Dictionary by value: ", sort_bat_dict)

Sorted Dictionary by value:  {'KolkataKnightRiders': -0.1429884056497032, 'DelhiCapitals': -0.09568159714717964, 'RajasthanRoyals': -0.08782838652354537, 'RoyalChallengersBangalore': -0.03603592723019618, 'SunrisersHyderabad': -0.021699009247779087, 'PunjabLions': 0.019528966136976307, 'ChennaiSuperKings': 0.026760242674982237, 'MumbaiIndians': 0.10870125659946274}


In [10]:
sort_bowl_dict= dict(sorted(rI_bowl().items(), key=operator.itemgetter(1))) 
print("Sorted Dictionary by value: ", sort_bowl_dict)

Sorted Dictionary by value:  {'DelhiCapitals': -0.07786219049160013, 'MumbaiIndians': -0.07627751857081548, 'PunjabLions': -0.02574726131346169, 'SunrisersHyderabad': -0.007227276121737493, 'ChennaiSuperKings': -0.0008597099517881068, 'RoyalChallengersBangalore': 0.0490952463760612, 'KolkataKnightRiders': 0.06782003313709033, 'RajasthanRoyals': 0.06876273852859732}


In [11]:
results = df = pd.read_html('https://stats.espncricinfo.com/ci/engine/records/team/match_results.html?id=13533;type=tournament', header = 0)


In [12]:
results = results[0][['Team 1','Team 2','Winner']]
results

Unnamed: 0,Team 1,Team 2,Winner
0,Mum Indians,Super Kings,Super Kings
1,Capitals,Kings XI,tied
2,Sunrisers,RCB,RCB
3,Royals,Super Kings,Royals
4,KKR,Mum Indians,Mum Indians
5,Kings XI,RCB,Kings XI
6,Super Kings,Capitals,Capitals
7,KKR,Sunrisers,KKR
8,Royals,Kings XI,Royals
9,RCB,Mum Indians,tied


In [13]:
team_dict = {'Mum Indians':'MumbaiIndians' , 'Super Kings':'ChennaiSuperKings' ,'Sunrisers' : 'SunrisersHyderabad' ,
             'Capitals': 'DelhiCapitals' , 'Kings XI': 'PunjabLions', 'KKR': 'KolkataKnightRiders', 'Royals':'RajasthanRoyals' ,'RCB' :'RoyalChallengersBangalore' }

In [14]:
results = results.replace({"Team 1": team_dict,"Team 2": team_dict,"Winner": team_dict })

In [15]:
results['Team 1'].unique()

array(['MumbaiIndians', 'DelhiCapitals', 'SunrisersHyderabad',
       'RajasthanRoyals', 'KolkataKnightRiders', 'PunjabLions',
       'ChennaiSuperKings', 'RoyalChallengersBangalore'], dtype=object)

In [16]:
results

Unnamed: 0,Team 1,Team 2,Winner
0,MumbaiIndians,ChennaiSuperKings,ChennaiSuperKings
1,DelhiCapitals,PunjabLions,tied
2,SunrisersHyderabad,RoyalChallengersBangalore,RoyalChallengersBangalore
3,RajasthanRoyals,ChennaiSuperKings,RajasthanRoyals
4,KolkataKnightRiders,MumbaiIndians,MumbaiIndians
5,PunjabLions,RoyalChallengersBangalore,PunjabLions
6,ChennaiSuperKings,DelhiCapitals,DelhiCapitals
7,KolkataKnightRiders,SunrisersHyderabad,KolkataKnightRiders
8,RajasthanRoyals,PunjabLions,RajasthanRoyals
9,RoyalChallengersBangalore,MumbaiIndians,tied


In [17]:
results['team1bat']=results['Team 1'].apply(lambda x: sort_bat_dict[x] if x in sort_bat_dict else '')
results['team1bowl']=results['Team 1'].apply(lambda x: sort_bowl_dict[x] if x in sort_bowl_dict else '')
results['team2bat']=results['Team 2'].apply(lambda x: sort_bat_dict[x] if x in sort_bat_dict else '')
results['team2bowl']=results['Team 2'].apply(lambda x: sort_bowl_dict[x] if x in sort_bowl_dict else '')


In [18]:
results = pd.get_dummies(results, columns=['Team 1','Team 2'])


In [19]:
results

Unnamed: 0,Winner,team1bat,team1bowl,team2bat,team2bowl,Team 1_ChennaiSuperKings,Team 1_DelhiCapitals,Team 1_KolkataKnightRiders,Team 1_MumbaiIndians,Team 1_PunjabLions,...,Team 1_RoyalChallengersBangalore,Team 1_SunrisersHyderabad,Team 2_ChennaiSuperKings,Team 2_DelhiCapitals,Team 2_KolkataKnightRiders,Team 2_MumbaiIndians,Team 2_PunjabLions,Team 2_RajasthanRoyals,Team 2_RoyalChallengersBangalore,Team 2_SunrisersHyderabad
0,ChennaiSuperKings,0.108701,-0.076278,0.02676,-0.00086,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
1,tied,-0.095682,-0.077862,0.019529,-0.025747,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,RoyalChallengersBangalore,-0.021699,-0.007227,-0.036036,0.049095,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0
3,RajasthanRoyals,-0.087828,0.068763,0.02676,-0.00086,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,MumbaiIndians,-0.142988,0.06782,0.108701,-0.076278,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
5,PunjabLions,0.019529,-0.025747,-0.036036,0.049095,0,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
6,DelhiCapitals,0.02676,-0.00086,-0.095682,-0.077862,1,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
7,KolkataKnightRiders,-0.142988,0.06782,-0.021699,-0.007227,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
8,RajasthanRoyals,-0.087828,0.068763,0.019529,-0.025747,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
9,tied,-0.036036,0.049095,0.108701,-0.076278,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0


In [20]:
train = results[0:40] 
test = results[40:60]


In [21]:
from sklearn.linear_model import LogisticRegression

In [22]:
train.columns

Index(['Winner', 'team1bat', 'team1bowl', 'team2bat', 'team2bowl',
       'Team 1_ChennaiSuperKings', 'Team 1_DelhiCapitals',
       'Team 1_KolkataKnightRiders', 'Team 1_MumbaiIndians',
       'Team 1_PunjabLions', 'Team 1_RajasthanRoyals',
       'Team 1_RoyalChallengersBangalore', 'Team 1_SunrisersHyderabad',
       'Team 2_ChennaiSuperKings', 'Team 2_DelhiCapitals',
       'Team 2_KolkataKnightRiders', 'Team 2_MumbaiIndians',
       'Team 2_PunjabLions', 'Team 2_RajasthanRoyals',
       'Team 2_RoyalChallengersBangalore', 'Team 2_SunrisersHyderabad'],
      dtype='object')

In [23]:
X = train[['team1bat', 'team1bowl', 'team2bat', 'team2bowl',
       'Team 1_ChennaiSuperKings', 'Team 1_DelhiCapitals',
       'Team 1_KolkataKnightRiders', 'Team 1_MumbaiIndians',
       'Team 1_PunjabLions', 'Team 1_RajasthanRoyals',
       'Team 1_RoyalChallengersBangalore', 'Team 1_SunrisersHyderabad',
       'Team 2_ChennaiSuperKings', 'Team 2_DelhiCapitals',
       'Team 2_KolkataKnightRiders', 'Team 2_MumbaiIndians',
       'Team 2_PunjabLions', 'Team 2_RajasthanRoyals',
       'Team 2_RoyalChallengersBangalore', 'Team 2_SunrisersHyderabad']]
Y = train[['Winner']]
X_test = test[['team1bat', 'team1bowl', 'team2bat', 'team2bowl',
       'Team 1_ChennaiSuperKings', 'Team 1_DelhiCapitals',
       'Team 1_KolkataKnightRiders', 'Team 1_MumbaiIndians',
       'Team 1_PunjabLions', 'Team 1_RajasthanRoyals',
       'Team 1_RoyalChallengersBangalore', 'Team 1_SunrisersHyderabad',
       'Team 2_ChennaiSuperKings', 'Team 2_DelhiCapitals',
       'Team 2_KolkataKnightRiders', 'Team 2_MumbaiIndians',
       'Team 2_PunjabLions', 'Team 2_RajasthanRoyals',
       'Team 2_RoyalChallengersBangalore', 'Team 2_SunrisersHyderabad']]
Y_test = test[['Winner']]


In [24]:
X_test

Unnamed: 0,team1bat,team1bowl,team2bat,team2bowl,Team 1_ChennaiSuperKings,Team 1_DelhiCapitals,Team 1_KolkataKnightRiders,Team 1_MumbaiIndians,Team 1_PunjabLions,Team 1_RajasthanRoyals,Team 1_RoyalChallengersBangalore,Team 1_SunrisersHyderabad,Team 2_ChennaiSuperKings,Team 2_DelhiCapitals,Team 2_KolkataKnightRiders,Team 2_MumbaiIndians,Team 2_PunjabLions,Team 2_RajasthanRoyals,Team 2_RoyalChallengersBangalore,Team 2_SunrisersHyderabad
40,0.02676,-0.00086,0.108701,-0.076278,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
41,-0.142988,0.06782,-0.095682,-0.077862,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0
42,0.019529,-0.025747,-0.021699,-0.007227,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1
43,-0.036036,0.049095,0.02676,-0.00086,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0
44,-0.087828,0.068763,0.108701,-0.076278,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0
45,-0.142988,0.06782,0.019529,-0.025747,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0
46,-0.021699,-0.007227,-0.095682,-0.077862,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0
47,0.108701,-0.076278,-0.036036,0.049095,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0
48,0.02676,-0.00086,-0.142988,0.06782,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
49,0.019529,-0.025747,-0.087828,0.068763,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0


In [25]:
X

Unnamed: 0,team1bat,team1bowl,team2bat,team2bowl,Team 1_ChennaiSuperKings,Team 1_DelhiCapitals,Team 1_KolkataKnightRiders,Team 1_MumbaiIndians,Team 1_PunjabLions,Team 1_RajasthanRoyals,Team 1_RoyalChallengersBangalore,Team 1_SunrisersHyderabad,Team 2_ChennaiSuperKings,Team 2_DelhiCapitals,Team 2_KolkataKnightRiders,Team 2_MumbaiIndians,Team 2_PunjabLions,Team 2_RajasthanRoyals,Team 2_RoyalChallengersBangalore,Team 2_SunrisersHyderabad
0,0.108701,-0.076278,0.02676,-0.00086,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0
1,-0.095682,-0.077862,0.019529,-0.025747,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
2,-0.021699,-0.007227,-0.036036,0.049095,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0
3,-0.087828,0.068763,0.02676,-0.00086,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0
4,-0.142988,0.06782,0.108701,-0.076278,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
5,0.019529,-0.025747,-0.036036,0.049095,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
6,0.02676,-0.00086,-0.095682,-0.077862,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
7,-0.142988,0.06782,-0.021699,-0.007227,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1
8,-0.087828,0.068763,0.019529,-0.025747,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0
9,-0.036036,0.049095,0.108701,-0.076278,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0


In [26]:
len(X.columns)

20

In [27]:
# Y = pd.get_dummies(Y.Winner)

In [28]:
Y

Unnamed: 0,Winner
0,ChennaiSuperKings
1,tied
2,RoyalChallengersBangalore
3,RajasthanRoyals
4,MumbaiIndians
5,PunjabLions
6,DelhiCapitals
7,KolkataKnightRiders
8,RajasthanRoyals
9,tied


In [29]:
clf = LogisticRegression()
clf.fit(X,Y)

  return f(*args, **kwargs)


LogisticRegression()

In [30]:
clf.score(X_test,Y_test)

0.3

In [31]:
r = clf.predict_proba(X_test)
r[0].max()

0.26444681663291847

In [32]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='rbf')
svclassifier.fit(X, Y)

  return f(*args, **kwargs)


SVC()

In [33]:
svclassifier.score(X_test,Y_test)

0.25

In [34]:
svclassifier.predict(X_test)

array(['MumbaiIndians', 'DelhiCapitals', 'SunrisersHyderabad',
       'ChennaiSuperKings', 'MumbaiIndians', 'MumbaiIndians',
       'DelhiCapitals', 'MumbaiIndians', 'DelhiCapitals', 'MumbaiIndians',
       'DelhiCapitals', 'SunrisersHyderabad', 'tied',
       'RoyalChallengersBangalore', 'RoyalChallengersBangalore',
       'MumbaiIndians', 'DelhiCapitals', 'SunrisersHyderabad',
       'SunrisersHyderabad', 'DelhiCapitals'], dtype=object)

In [35]:
clf.predict_proba(X_test)

array([[0.04512163, 0.12308424, 0.0525652 , 0.26444682, 0.04425319,
        0.11841904, 0.10040615, 0.10722063, 0.14448312],
       [0.03120251, 0.31297425, 0.18157755, 0.16652763, 0.07789986,
        0.04615263, 0.09092606, 0.04263091, 0.05010861],
       [0.06840485, 0.04496013, 0.16070192, 0.15791784, 0.15711802,
        0.04625424, 0.04825622, 0.2672433 , 0.04914349],
       [0.21478276, 0.1589381 , 0.08201408, 0.04522215, 0.08311903,
        0.08435567, 0.19674418, 0.04818674, 0.0866373 ],
       [0.03679046, 0.10780923, 0.08465453, 0.23910965, 0.03978322,
        0.18904624, 0.09161012, 0.09321496, 0.11798159],
       [0.03667061, 0.06271794, 0.2165288 , 0.10503592, 0.10140893,
        0.10504019, 0.10320493, 0.09441586, 0.17497682],
       [0.07065896, 0.315986  , 0.04825459, 0.10493719, 0.08242614,
        0.08438886, 0.08672978, 0.10655042, 0.10006808],
       [0.05548174, 0.0434815 , 0.03326787, 0.32212145, 0.0660978 ,
        0.033667  , 0.33224892, 0.03395373, 0.07967998],


In [36]:
y_pred = svclassifier.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(Y_test, y_pred))
print(classification_report(Y_test, y_pred))

[[1 1 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 1 0]
 [0 1 0 0 0 0 1 0 0]
 [0 3 0 2 0 0 0 0 0]
 [0 0 0 1 0 0 0 1 0]
 [0 0 0 2 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0]
 [0 1 0 1 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0]]
                           precision    recall  f1-score   support

        ChennaiSuperKings       1.00      0.33      0.50         3
            DelhiCapitals       0.00      0.00      0.00         2
      KolkataKnightRiders       0.00      0.00      0.00         2
            MumbaiIndians       0.33      0.40      0.36         5
              PunjabLions       0.00      0.00      0.00         2
          RajasthanRoyals       0.00      0.00      0.00         2
RoyalChallengersBangalore       0.00      0.00      0.00         0
       SunrisersHyderabad       0.50      0.50      0.50         4
                     tied       0.00      0.00      0.00         0

                 accuracy                           0.25        20
                macro avg       0.20      0.14      0.15        20
    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [37]:
import pickle
pickle.dump(clf, open('IPL_clf.pkl', 'wb'))