In [18]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('final.csv')

In [62]:
df.tail(50)

Unnamed: 0,BattingTeam,BowlingTeam,Venue,Overs,CurrentScore,Wicket,Target,Overleft,Wicketleft
17841,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",8,69,1,138,12,9
17842,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",9,77,2,130,11,8
17843,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",10,84,2,123,10,8
17844,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",11,90,3,117,9,7
17845,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",12,106,3,101,8,7
17846,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",13,115,3,92,7,7
17847,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",14,117,4,90,6,6
17848,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",15,123,4,84,5,6
17849,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",16,150,4,57,4,6
17850,Royal Challengers Bangalore,Lucknow Super Giants,"Eden Gardens, Kolkata",17,165,4,42,3,6


In [33]:
df['Overleft'] = 20 - df['Overs']
df['Wicketleft'] = 10 - df['Wicket']
df['Target'] = df['Target']-df['CurrentScore']

In [93]:
categorical = OneHotEncoder(sparse_output=False, drop='first')
scaler = MinMaxScaler()

In [94]:
X = df.drop(['Target','Overs','Wicket'],axis=1)
y = df['Target']

In [95]:
xtrain, xtest, ytrain, ytest = train_test_split(X,y)

In [96]:
xtrain.head()

Unnamed: 0,BattingTeam,BowlingTeam,Venue,CurrentScore,Overleft,Wicketleft
14444,Kolkata Knight Riders,Rajasthan Royals,Dubai International Cricket Stadium,10,18,10
12951,Rajasthan Royals,Royal Challengers Bangalore,Sawai Mansingh Stadium,101,7,9
289,Kolkata Knight Riders,Mumbai Indians,"Eden Gardens, Kolkata",40,14,7
6460,Rajasthan Royals,Mumbai Indians,Sawai Mansingh Stadium,72,11,9
11192,Sunrisers Hyderabad,Punjab Kings,"Rajiv Gandhi International Stadium, Uppal",29,14,9


In [109]:
categorical = Pipeline(steps = [
    ('onehot', categorical)
])

categorical_cols = ['BattingTeam','BowlingTeam','Venue']
num_cols = ['CurrentScore','Overleft','Wicketleft']

preprocessor = ColumnTransformer(
    transformers= [
        ('cat', categorical, categorical_cols),
        ('num', scaler, num_cols)
    ]
)

model = LinearRegression()

In [110]:
master = Pipeline(
    steps=[
        ('preprocess', preprocessor),
        ('model', model)
    ]
)

In [111]:
master.fit(xtrain, ytrain)

In [112]:
ypred = master.predict(xtest)

In [113]:
from sklearn.metrics import root_mean_squared_error,mean_squared_error,adjusted_rand_score
print(mean_squared_error(ytest,ypred))
print(root_mean_squared_error(ytest,ypred))
adjusted_rand_score(ytest,ypred)

426.6182735527747
20.654739735779163




-4.197777132584835e-06

In [105]:
master.predict(pd.DataFrame({
    'BattingTeam':['Kolkata Knight Riders'],
    'BowlingTeam':['Chennai Super Kings'],
    'Venue':['MA Chidambaram Stadium, Chepauk, Chennai'],
    'Overs':[19],
    'CurrentScore':[100],	
    'Wicket':[9],
    'Overleft':[1],
    'Wicketleft':[1]
}))

array([-9.93899139])

In [103]:
xtrain.head()

Unnamed: 0,BattingTeam,BowlingTeam,Venue,CurrentScore,Overleft,Wicketleft
14444,Kolkata Knight Riders,Rajasthan Royals,Dubai International Cricket Stadium,10,18,10
12951,Rajasthan Royals,Royal Challengers Bangalore,Sawai Mansingh Stadium,101,7,9
289,Kolkata Knight Riders,Mumbai Indians,"Eden Gardens, Kolkata",40,14,7
6460,Rajasthan Royals,Mumbai Indians,Sawai Mansingh Stadium,72,11,9
11192,Sunrisers Hyderabad,Punjab Kings,"Rajiv Gandhi International Stadium, Uppal",29,14,9


In [104]:
master.predict(pd.DataFrame({
    'BattingTeam':['Kolkata Knight Riders'],
    'BowlingTeam':['Chennai Super Kings'],
    'Venue':['Punjab Cricket Association IS Bindra Stadium, Mohali'],
    'Overs':[12],
    'CurrentScore':[85],	
    'Wicket':[5],
    'Overleft':[8],
    'Wicketleft':[5]
}))

array([55.7126971])

In [59]:
import joblib

joblib.dump(master,'Lnr_Reg1.pkl')

['Lnr_Reg1.pkl']