## Importing Libraries

In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_columns', None)

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pickle
from sklearn.metrics import mean_absolute_error

## Uploading the Featured T20Is Data file

In [2]:
df = pd.read_csv('../Material/T20Is_Featured.csv')

In [3]:
df

Unnamed: 0,id,innings,batting_team,bowling_team,overs,over,ball,total_runs,player_dismissed,total,total_score,prev_30_runs,prev_30_wickets,total_wickets,prev_30_dot_balls,prev_30_boundaries
0,211028,1,England,Australia,0.1,0,1,0,0,179,0,0,0,0,1,0
1,211028,1,England,Australia,0.2,0,2,1,0,179,1,1,0,0,1,0
2,211028,1,England,Australia,0.3,0,3,0,0,179,1,1,0,0,2,0
3,211028,1,England,Australia,0.4,0,4,0,0,179,1,1,0,0,3,0
4,211028,1,England,Australia,0.5,0,5,0,0,179,1,1,0,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161909,1348651,2,New Zealand,India,11.3,11,3,0,0,66,66,31,3,8,13,2
161910,1348651,2,New Zealand,India,11.4,11,4,0,0,66,66,30,3,8,14,2
161911,1348651,2,New Zealand,India,11.5,11,5,0,1,66,66,28,4,9,15,2
161912,1348651,2,New Zealand,India,11.6,11,6,0,0,66,66,27,4,9,16,2


In [4]:
df = pd.get_dummies(data=df, columns=['batting_team', 'bowling_team'])

In [5]:
df.columns

Index(['id', 'innings', 'overs', 'over', 'ball', 'total_runs',
       'player_dismissed', 'total', 'total_score', 'prev_30_runs',
       'prev_30_wickets', 'total_wickets', 'prev_30_dot_balls',
       'prev_30_boundaries', 'batting_team_Australia',
       'batting_team_Bangladesh', 'batting_team_England', 'batting_team_India',
       'batting_team_New Zealand', 'batting_team_Pakistan',
       'batting_team_South Africa', 'batting_team_Sri Lanka',
       'batting_team_West Indies', 'bowling_team_Australia',
       'bowling_team_Bangladesh', 'bowling_team_England', 'bowling_team_India',
       'bowling_team_New Zealand', 'bowling_team_Pakistan',
       'bowling_team_South Africa', 'bowling_team_Sri Lanka',
       'bowling_team_West Indies'],
      dtype='object')

In [6]:
df = df[['id', 'batting_team_Australia',
       'batting_team_Bangladesh', 'batting_team_England', 'batting_team_India',
       'batting_team_New Zealand', 'batting_team_Pakistan',
       'batting_team_South Africa', 'batting_team_Sri Lanka',
       'batting_team_West Indies', 'bowling_team_Australia',
       'bowling_team_Bangladesh', 'bowling_team_England', 'bowling_team_India',
       'bowling_team_New Zealand', 'bowling_team_Pakistan',
       'bowling_team_South Africa', 'bowling_team_Sri Lanka',
       'bowling_team_West Indies',
        'overs', 'total_score', 'total_wickets', 'prev_30_runs',
       'prev_30_wickets', 'prev_30_dot_balls',
       'prev_30_boundaries', 'total']]

### train_test_split

In [7]:
X = df.drop(labels=['total', 'id'], axis=1)
y = df['total'].values
X

Unnamed: 0,batting_team_Australia,batting_team_Bangladesh,batting_team_England,batting_team_India,batting_team_New Zealand,batting_team_Pakistan,batting_team_South Africa,batting_team_Sri Lanka,batting_team_West Indies,bowling_team_Australia,bowling_team_Bangladesh,bowling_team_England,bowling_team_India,bowling_team_New Zealand,bowling_team_Pakistan,bowling_team_South Africa,bowling_team_Sri Lanka,bowling_team_West Indies,overs,total_score,total_wickets,prev_30_runs,prev_30_wickets,total_wickets.1,prev_30_dot_balls,prev_30_boundaries
0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.1,0,0,0,0,0,1,0
1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.2,1,0,1,0,0,1,0
2,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.3,1,0,1,0,0,2,0
3,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.4,1,0,1,0,0,3,0
4,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.5,1,0,1,0,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161909,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,11.3,66,8,31,3,8,13,2
161910,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,11.4,66,8,30,3,8,14,2
161911,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,11.5,66,9,28,4,9,15,2
161912,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,11.6,66,9,27,4,9,16,2


In [8]:
# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y) 

In [9]:
X_train = X_train.values
X_test = X_test.values
X_train = np.asarray(X_train).astype(np.float32)
X_test = np.asarray(X_test).astype(np.float32)

In [10]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(121435, 26) (40479, 26) (121435,) (40479,)


## Training LR Model

In [11]:
LR_model = LinearRegression()
LR_model.fit(X_train, y_train)

LinearRegression()

In [12]:
# Creating a pickle file for the classifier
filename = '../Material/lr-model.pkl'
pickle.dump(LR_model, open(filename, 'wb'))

In [13]:
prediction = LR_model.predict(X_test)
meanError = int(mean_absolute_error(y_test, prediction))
print(meanError)

18


In [14]:
# Function to predict the scores
def score_prediction(Bat_Team, Bowl_Team, overs, total_score, total_wickets,
                     prev_30_runs, prev_30_wickets, prev_30_dot_balls, prev_30_boundaries):
    temp_array = list()
    
    if Bat_Team == 'AUS':
        temp_array += [1,0,0,0,0,0,0,0,0]
    elif Bat_Team == 'BAN':
        temp_array += [0,1,0,0,0,0,0,0,0]
    elif Bat_Team == 'ENG':
        temp_array += [0,0,1,0,0,0,0,0,0]
    elif Bat_Team == 'IND':
        temp_array += [0,0,0,1,0,0,0,0,0]
    elif Bat_Team == 'NZ':
        temp_array += [0,0,0,0,1,0,0,0,0]
    elif Bat_Team == 'PAK':
        temp_array += [0,0,0,0,0,1,0,0,0]
    elif Bat_Team == 'SF':
        temp_array += [0,0,0,0,0,0,1,0,0]
    elif Bat_Team == 'SL':
        temp_array += [0,0,0,0,0,0,0,1,0]
    elif Bat_Team == 'WI':
        temp_array += [0,0,0,0,0,0,0,0,1]
    
    
    if Bowl_Team == 'AUS':
        temp_array += [1,0,0,0,0,0,0,0,0]
    elif Bowl_Team == 'BAN':
        temp_array += [0,1,0,0,0,0,0,0,0]
    elif Bowl_Team == 'ENG':
        temp_array += [0,0,1,0,0,0,0,0,0]
    elif Bowl_Team == 'IND':
        temp_array += [0,0,0,1,0,0,0,0,0]
    elif Bowl_Team == 'NZ':
        temp_array += [0,0,0,0,1,0,0,0,0]
    elif Bowl_Team == 'PAK':
        temp_array += [0,0,0,0,0,1,0,0,0]
    elif Bowl_Team == 'SF':
        temp_array += [0,0,0,0,0,0,1,0,0]
    elif Bowl_Team == 'SL':
        temp_array += [0,0,0,0,0,0,0,1,0]
    elif Bowl_Team == 'WI':
        temp_array += [0,0,0,0,0,0,0,0,1]
            
    
    temp_array += [overs, total_score, total_wickets, prev_30_runs, prev_30_wickets, prev_30_dot_balls, prev_30_boundaries]
    data = np.array([temp_array])
    my_prediction = int(LR_model.predict(data))
    
    print('Predicted Score: ',my_prediction)
    print('Predicted Score Range: ',my_prediction - meanError, 'to',my_prediction + meanError)
        

In [15]:
Bat_Team = 'AUS'

Bowl_Team = 'IND'

overs = 10.3

total_score = 67

total_wickets = 2

prev_30_runs = 37

prev_30_wickets = 1

prev_30_dot_balls = 8

prev_30_boundaries = 5

score_prediction(Bat_Team, Bowl_Team, overs, total_score, total_wickets, prev_30_runs, prev_30_wickets, prev_30_dot_balls, prev_30_boundaries)

ValueError: X has 25 features, but LinearRegression is expecting 26 features as input.