# **Defining Custom Accuracy**

In [None]:
def custom_accuracy(y_test,y_pred,thresold):
    right = 0
    l = len(y_pred)
    for i in range(0,l):
        if(abs(round(y_pred[i])-y_test[i]) <= thresold):
            right += 1
    return ((right/l)*100)

In [None]:
#dataset.iloc[:,[11,3,4,13,14]]

# **Importing the preprocessed dataset and selecting required columns**

In [None]:
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('final_PreProcess10.csv')

dataset=dataset[dataset['innings']==1]
#filter the overs
dataset=dataset[dataset['ball']>9.6]


# Selection of required Columns
X = dataset.iloc[:,[11,3,4,13,14]].values
y = dataset.iloc[:, 5].values

# **Splitting the dataset into the Training set and Test set and Feature Scaling**

In [None]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [14]:
from joblib import dump, load
dump(sc, 'std_scaler.bin', compress=True)

['std_scaler.bin']

# **Random K-Fold Cross validation for Ridge regression model on the Cricket dataset**

In [5]:
# random search Ridge regression model on the Cricket dataset
from scipy.stats import loguniform
from scipy.stats import uniform
from pandas import read_csv
from sklearn.linear_model import Ridge
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import RandomizedSearchCV

model = Ridge()

# define evaluation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)


# define search space
space = dict()
space['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']
# space['alpha'] = loguniform(1e-5, 100)
space['alpha'] = uniform(1e-5, 100)
space['fit_intercept'] = [True, False]
space['normalize'] = [True, False]


# define search
search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv, random_state=1)


# execute search
result = search.fit(X_train, y_train)


# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)





Best Score: -10.865399368073536
Best Hyperparameters: {'alpha': 1.8576302177409518, 'fit_intercept': True, 'normalize': False, 'solver': 'sag'}


# **Fitting the Model with updated parameters**

In [6]:
# Fitting the Model
y_pred = search.predict(X_test)
score = search.score(X_test,y_test)*100
print("Custom accuracy:" , custom_accuracy(y_test,y_pred,10))

Custom accuracy: 60.43768430855192


# **Random K-Fold Cross validation for Linear regression model on the Cricket dataset**

In [7]:
from scipy.stats import loguniform
from pandas import read_csv
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import RandomizedSearchCV

model = LinearRegression()

# define evaluation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)


# define search space
space = dict()
space['fit_intercept'] = [True, False]
space['normalize'] = [True, False]


# define search
search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv, random_state=1)


# execute search
result = search.fit(X_train, y_train)


# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)



Best Score: -10.865744742058437
Best Hyperparameters: {'normalize': True, 'fit_intercept': True}


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




In [8]:
# Fitting the Model
y_pred = search.predict(X_test)
score = search.score(X_test,y_test)*100
print("Custom accuracy:" , custom_accuracy(y_test,y_pred,10))

Custom accuracy: 60.44544466863262


# **Random K-Fold Cross validation for Lasso regression model on the Cricket dataset**

In [9]:
from scipy.stats import loguniform
from pandas import read_csv
from sklearn.linear_model import Lasso
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import RandomizedSearchCV

model = Lasso()

# define evaluation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)


# define search space
space = dict()
space['tol'] = loguniform(1e-5, 100)
space['fit_intercept'] = [True, False]
space['normalize'] = [True, False]
space['selection'] = ['cyclic','random']


# define search
search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv, random_state=1)


# execute search
result = search.fit(X_train, y_train)


# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: -11.166146069919716
Best Hyperparameters: {'fit_intercept': True, 'normalize': False, 'selection': 'cyclic', 'tol': 1.012396647506536e-05}




In [10]:
# Fitting the Model
y_pred = search.predict(X_test)
score = search.score(X_test,y_test)*100
print("Custom accuracy:" , custom_accuracy(y_test,y_pred,10))

Custom accuracy: 57.993170883128975


In [None]:
from scipy.stats import loguniform
from pandas import read_csv
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV

model = RandomForestRegressor()

# define evaluation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)


# define search space / Grid Space 

space = dict()
space['criterion'] = ['mae','mse']
space['warm_start'] = [True, False]
space['ccp_alpha'] = [1e-5,1e-4,1e-3,1e-2,1e-1,1,10,100]

# define search
search = GridSearchCV(model, space, refit = True, verbose = 3,n_jobs=-1)


# execute search
result = search.fit(X_train, y_train)


# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Fitting 5 folds for each of 32 candidates, totalling 160 fits


In [None]:
# Fitting the Model
y_pred = search.predict(X_test)
score = search.score(X_test,y_test)*100
print("Custom accuracy:" , custom_accuracy(y_test,y_pred,10))

In [None]:
# Splitting the dataset into the Training set and Test set
# from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# # Feature Scaling
# from sklearn.preprocessing import StandardScaler
# sc = StandardScaler()
# X_train = sc.fit_transform(X_train)
# X_test = sc.transform(X_test)

# Training the dataset
from sklearn.linear_model import LinearRegression
lin = LinearRegression()
lin.fit(X_train,y_train)

# Testing the dataset on trained model
y_pred = lin.predict(X_test)
score = lin.score(X_test,y_test)*100
print("R square value:" , score)
print("Custom accuracy:" , custom_accuracy(y_test,y_pred,10))

# Testing with a custom input
import numpy as np
new_prediction = lin.predict(sc.transform(np.array([[100,5,19.3,50,2]])))
print("Prediction score:" , new_prediction)

R square value: 75.33244760640731
Custom accuracy: 60.515287909358996
Prediction score: [108.7913039]


In [None]:
# l=[]
# for i in range(0,20):
#   l.append(custom_accuracy(y_test,y_pred,i))
# for i,ele in enumerate(l):
#   print(i,ele)

## Random Forest Regressor

In [None]:
# Training the dataset
from sklearn.ensemble import RandomForestRegressor
reg = RandomForestRegressor(n_estimators=100,max_features=None)
reg.fit(X_train,y_train)

# Testing the dataset on trained model
y_pred = reg.predict(X_test)
score = reg.score(X_test,y_test)*100
print("R square value:" , score)
print("Custom accuracy:" , custom_accuracy(y_test,y_pred,20))

# Testing with a custom input
import numpy as np
new_prediction = reg.predict(sc.transform(np.array([[100,0,13,50,50]])))
print("Prediction score:" , new_prediction)

In [None]:
l=[]
for i in range(0,20):
  l.append(custom_accuracy(y_test,y_pred,i))
for i,ele in enumerate(l):
  print(i,ele)

In [10]:
import numpy as np
new_prediction = reg.predict(sc.transform(np.array([[100,9,19,3,1]])))
print("Prediction score:" , new_prediction)

Prediction score: [105.02]


In [None]:
#import gzip, pickle, pickletools

In [None]:
# # optimize pickle size

# filepath = "random_forest_optimised.pkl"
# with gzip.open(filepath, "wb") as f:
#     pickled = pickle.dumps(clf)
#     optimized_pickle = pickletools.optimize(pickled)
#     f.write(optimized_pickle)

In [None]:
# #opening the pickle file
# with gzip.open(filepath, 'rb') as f:
#     p = pickle.Unpickler(f)
#     clf = p.load()

In [18]:
import pickle

pickle.dump(reg, open('random_forest_regressor.pkl','wb'))

In [None]:
dataset.tail(10)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,striker,non_striker,Wicket,overs,run_sum,innings,runs_off_bat,player_dismissed,required_striker,required_non_striker,runs
100181,100181,193495,AR Patel,SS Iyer,4,18.3,156.0,1,1,,8,0,145
100182,100182,193496,SS Iyer,AR Patel,4,18.4,156.0,1,0,,8,0,146
100183,100183,193497,AR Patel,SS Iyer,4,18.5,156.0,1,2,,10,0,148
100184,100184,193498,AR Patel,SS Iyer,4,18.6,156.0,1,0,,10,0,148
100185,100185,193499,SS Iyer,AR Patel,4,19.1,156.0,1,1,,11,0,149
100186,100186,193500,AR Patel,SS Iyer,5,19.2,156.0,1,0,AR Patel,0,0,149
100187,100187,193501,SS Iyer,K Rabada,5,19.3,156.0,1,0,,0,0,149
100188,100188,193502,SS Iyer,K Rabada,5,19.4,156.0,1,0,,0,0,149
100189,100189,193503,SS Iyer,K Rabada,5,19.5,156.0,1,6,,6,0,155
100190,100190,193504,SS Iyer,K Rabada,6,19.6,156.0,1,1,K Rabada,1,0,156


In [None]:
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.linear_model import Lasso,Ridge,BayesianRidge,ElasticNet,HuberRegressor,LinearRegression,LogisticRegression,SGDRegressor
from sklearn.metrics import mean_squared_error

models = [['DecisionTree :',DecisionTreeRegressor()],
           ['Linear Regression :', LinearRegression()],
           ['RandomForest :',RandomForestRegressor()],
           ['KNeighbours :', KNeighborsRegressor(n_neighbors = 2)],
           #['SVM :', SVR()],
           ['AdaBoostClassifier :', AdaBoostRegressor()],
           ['GradientBoostingClassifier: ', GradientBoostingRegressor()],
           ['Xgboost: ', XGBRegressor()],
           ['CatBoost: ', CatBoostRegressor(logging_level='Silent')],
           ['Lasso: ', Lasso()],
           ['Ridge: ', Ridge()],
           ['BayesianRidge: ', BayesianRidge()],
           ['ElasticNet: ', ElasticNet()],
           ['HuberRegressor: ', HuberRegressor()]]

print("Results...")


for name,model in models:
    model = model
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print(name, (np.sqrt(mean_squared_error(y_test, predictions))))
    print("Custom accuracy:" , custom_accuracy(y_test,y_pred,5))
    print(model.predict(sc.transform(np.array([[128,4,16.6,71,4]]))))

Results...
DecisionTree : 9.961547714437772
Custom accuracy: 49.10757356488181
[157.]
Linear Regression : 9.656075381415805
Custom accuracy: 49.10757356488181
[161.14098297]
RandomForest : 6.90872621058927
Custom accuracy: 49.10757356488181
[165.28]
KNeighbours : 6.4867454255915975
Custom accuracy: 49.10757356488181
[168.]
AdaBoostClassifier : 12.509737796523995
Custom accuracy: 49.10757356488181
[162.9109136]
GradientBoostingClassifier:  9.252298596484929
Custom accuracy: 49.10757356488181
[162.88930094]
Xgboost:  9.265018120994084
Custom accuracy: 49.10757356488181
[163.56276]
CatBoost:  8.114021434105535
Custom accuracy: 49.10757356488181
[166.33697438]
Lasso:  9.96113152834177
Custom accuracy: 49.10757356488181
[160.29346897]
Ridge:  9.656145953875901
Custom accuracy: 49.10757356488181
[161.14103671]
BayesianRidge:  9.65610794174292
Custom accuracy: 49.10757356488181
[161.1410078]
ElasticNet:  14.596770634425454
Custom accuracy: 49.10757356488181
[162.55344914]
HuberRegressor:  9.7