# **Importing the libraries**

In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import pickle
from sklearn.metrics import mean_absolute_error

# **Loading the data**

In [16]:
df = pd.read_csv("Data_with_features.csv")

In [17]:
df.head(5)

Unnamed: 0,id,innings,batting_team,bowling_team,overs,over,ball,total_runs,player_dismissed,total,total_score,prev_30_runs,prev_30_wickets,total_wickets,prev_30_dot_balls,prev_30_boundaries,total_dot_balls,total_boundaries
0,211028,1,England,Australia,0.1,0,1,0,0,179,0,0,0,0,1,0,1,0
1,211028,1,England,Australia,0.2,0,2,1,0,179,1,1,0,0,1,0,1,0
2,211028,1,England,Australia,0.3,0,3,0,0,179,1,1,0,0,2,0,2,0
3,211028,1,England,Australia,0.4,0,4,0,0,179,1,1,0,0,3,0,3,0
4,211028,1,England,Australia,0.5,0,5,0,0,179,1,1,0,0,4,0,4,0


In [18]:
pd.get_dummies(data=df, columns=['batting_team', 'bowling_team']).columns

Index(['id', 'innings', 'overs', 'over', 'ball', 'total_runs',
       'player_dismissed', 'total', 'total_score', 'prev_30_runs',
       'prev_30_wickets', 'total_wickets', 'prev_30_dot_balls',
       'prev_30_boundaries', 'total_dot_balls', 'total_boundaries',
       'batting_team_Australia', 'batting_team_Bangladesh',
       'batting_team_England', 'batting_team_India',
       'batting_team_New Zealand', 'batting_team_Pakistan',
       'batting_team_South Africa', 'batting_team_Sri Lanka',
       'batting_team_West Indies', 'bowling_team_Australia',
       'bowling_team_Bangladesh', 'bowling_team_England', 'bowling_team_India',
       'bowling_team_New Zealand', 'bowling_team_Pakistan',
       'bowling_team_South Africa', 'bowling_team_Sri Lanka',
       'bowling_team_West Indies'],
      dtype='object')

In [19]:
df=pd.get_dummies(data=df, columns=['batting_team', 'bowling_team'])

In [20]:
df.columns

Index(['id', 'innings', 'overs', 'over', 'ball', 'total_runs',
       'player_dismissed', 'total', 'total_score', 'prev_30_runs',
       'prev_30_wickets', 'total_wickets', 'prev_30_dot_balls',
       'prev_30_boundaries', 'total_dot_balls', 'total_boundaries',
       'batting_team_Australia', 'batting_team_Bangladesh',
       'batting_team_England', 'batting_team_India',
       'batting_team_New Zealand', 'batting_team_Pakistan',
       'batting_team_South Africa', 'batting_team_Sri Lanka',
       'batting_team_West Indies', 'bowling_team_Australia',
       'bowling_team_Bangladesh', 'bowling_team_England', 'bowling_team_India',
       'bowling_team_New Zealand', 'bowling_team_Pakistan',
       'bowling_team_South Africa', 'bowling_team_Sri Lanka',
       'bowling_team_West Indies'],
      dtype='object')

In [21]:
df=df[['id','batting_team_Australia','batting_team_Bangladesh', 'batting_team_England', 'batting_team_India','batting_team_New Zealand','batting_team_Pakistan',
       'batting_team_South Africa', 'batting_team_Sri Lanka','batting_team_West Indies','bowling_team_Australia', 'bowling_team_Bangladesh', 'bowling_team_England',
        'bowling_team_India', 'bowling_team_New Zealand', 'bowling_team_Pakistan','bowling_team_South Africa', 'bowling_team_Sri Lanka','bowling_team_West Indies',
            'overs','total_score','total_wickets','total_dot_balls', 'total_boundaries','prev_30_runs','prev_30_wickets','prev_30_dot_balls','prev_30_boundaries','total']]

# **train_test_split**

In [22]:
X = df.drop(labels=['total','id'], axis=1)
y = df['total'].values

In [23]:
# split into train test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,random_state=42,stratify=y)

In [24]:
X_train=X_train.values
X_test=X_test.values
X_train=np.asarray(X_train).astype(np.float32)
X_test=np.asarray(X_test).astype(np.float32)

In [25]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(89686, 27) (29896, 27) (89686,) (29896,)


# **Training LR model**

In [26]:
LR_model = LinearRegression()
LR_model.fit(X_train,y_train)

In [27]:
# Creating a pickle file for the classifier
filename = 'lr-model.pkl'
pickle.dump(LR_model, open(filename, 'wb'))

In [28]:
prediction=LR_model.predict(X_test)
mean_absolute_error (y_test, prediction)

18.305054252977705

In [29]:
# Function to predict scores :

def score_prediction(Bat_Team,Bowl_Team,overs,total_score,total_wickets, total_dot_balls, total_boundaries, prev_runs_30,prev_wickets_30,prev_30_dot_balls,prev_30_boundaries):

    temp_array = list()

    if Bat_Team == 'AUS':
        temp_array = temp_array + [1,0,0,0,0,0,0,0,0]
    elif Bat_Team == 'BAN':
        temp_array = temp_array + [0,1,0,0,0,0,0,0,0]
    elif Bat_Team == 'ENG':
        temp_array = temp_array + [0,0,1,0,0,0,0,0,0]
    elif Bat_Team == 'IND':
        temp_array = temp_array + [0,0,0,1,0,0,0,0,0]
    elif Bat_Team == 'NZ':
        temp_array = temp_array + [0,0,0,0,1,0,0,0,0]
    elif Bat_Team == 'PAK':
        temp_array = temp_array + [0,0,0,0,0,1,0,0,0]
    elif Bat_Team == 'SA':
        temp_array = temp_array + [0,0,0,0,0,0,1,0,0]
    elif Bat_Team == 'SL':
        temp_array = temp_array + [0,0,0,0,0,0,0,1,0]
    elif Bat_Team == 'WI':
        temp_array = temp_array + [0,0,0,0,0,0,0,0,1]

    if Bowl_Team == 'AUS':
        temp_array = temp_array + [1,0,0,0,0,0,0,0,0]
    elif Bowl_Team == 'BAN':
        temp_array = temp_array + [0,1,0,0,0,0,0,0,0]
    elif Bowl_Team == 'ENG':
        temp_array = temp_array + [0,0,1,0,0,0,0,0,0]
    elif Bowl_Team == 'IND':
        temp_array = temp_array + [0,0,0,1,0,0,0,0,0]
    elif Bowl_Team == 'NZ':
        temp_array = temp_array + [0,0,0,0,1,0,0,0,0]
    elif Bowl_Team == 'PAK':
        temp_array = temp_array + [0,0,0,0,0,1,0,0,0]
    elif Bowl_Team == 'SA':
        temp_array = temp_array + [0,0,0,0,0,0,1,0,0]
    elif Bowl_Team == 'SL':
        temp_array = temp_array + [0,0,0,0,0,0,0,1,0]
    elif Bowl_Team == 'WI':
        temp_array = temp_array + [0,0,0,0,0,0,0,0,1]

    temp_array = temp_array + [overs,total_score,total_wickets,total_dot_balls, total_boundaries,prev_runs_30,prev_wickets_30,prev_30_dot_balls,prev_30_boundaries]
    data = np.array([temp_array])
    my_prediction = int(LR_model.predict(data))

    print('Predicted score: ',my_prediction)
    print('Predicted score range: ',my_prediction - 18, 'to' , my_prediction + 18)

In [30]:
Bat_Team = 'AUS'

Bowl_Team = 'IND'

overs = 10.3

total_score = 67 # current score

total_wickets = 2 # current wicket

total_dot_balls = 27 # total dot balls

total_boundaries = 5 # total boundaries

prev_runs_30 = 37 # runs in last 30 balls

prev_wickets_30 = 1 # wickets in last 30 balls

prev_30_dot_balls = 8 # dots in last 30 balls

prev_30_boundaries = 5 # boundaries in last 30 balls

score_prediction(Bat_Team,Bowl_Team,overs,total_score,total_wickets, total_dot_balls, total_boundaries,
                         prev_runs_30,prev_wickets_30,prev_30_dot_balls,prev_30_boundaries)

Predicted score:  142
Predicted score range:  124 to 160


  my_prediction = int(LR_model.predict(data))


In [31]:
# Without features score is 145 and range is 127 to 163
# With addition of 2 features the score changes to 142 and range from 124 to 160