In [0]:
# Import Dependencies
import pandas as pd

# Xgboost is for another type of machine learning model based on decision trees
import xgboost as xgb

# The other two models are Support Vector Machines and Logistic Regression
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# Import keras for neural networks
from keras.layers import Dense, Dropout
from keras.models import Sequential

# Importing display for displaying our results
from IPython.display import display

# Improting files for uploading files
from google.colab import files

# Import Train_Test Split and Label Encoder from sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

##Models we are gonna Use :
- Neural Network
- Logistic Regression
- XGBoost
- Support Vector Machines


In [3]:
# Uploading the dataset
files.upload()

Saving odi_win_lose.csv to odi_win_lose.csv


{'odi_win_lose.csv': b'Unnamed: 0,Scorecard,Team 1,Team 2,Margin,Ground,Match Date,Winner,Host_Country,Venue_Team1,Venue_Team2,Innings_Team1,Innings_Team2\n0,ODI # 1,Australia,England,Winner2ndInning,Melbourne,"Jan 5, 1971",Australia,Australia,Home,Away,Second,First\n1,ODI # 2,England,Australia,Winner2ndInning,Manchester,"Aug 24, 1972",England,England,Home,Away,Second,First\n2,ODI # 3,England,Australia,Winner2ndInning,Lord\'s,"Aug 26, 1972",Australia,England,Home,Away,First,Second\n3,ODI # 4,England,Australia,Winner2ndInning,Birmingham,"Aug 28, 1972",England,England,Home,Away,Second,First\n4,ODI # 5,New Zealand,Pakistan,Winner1stInning,Christchurch,"Feb 11, 1973",New Zealand,New Zealand,Home,Away,First,Second\n5,ODI # 6,England,New Zealand,Winner2ndInning,Swansea,"Jul 18, 1973",England,England,Home,Away,Second,First\n6,ODI # 8,England,West Indies,Winner2ndInning,Leeds,"Sep 5, 1973",England,England,Home,Away,Second,First\n7,ODI # 9,England,West Indies,Winner2ndInning,The Oval,"Sep 7, 19

##Columns to select - 
- Team 1
- Team 2
- Ground
- Host Country
- Venue_Team1
- Venue_Team2
- Innings_Team1
- Innings_Team2
- Winner




In [0]:
# Reading the dataset and selecting the appropriate features
dataframe_raw = pd.read_csv('odi_win_lose.csv')
dataframe = pd.DataFrame(dataframe_raw, columns=['Team 1', 'Team 2', 'Ground', 'Host_Country', 'Venue_Team1', 'Venue_Team2', 'Innings_Team1', 'Innings_Team2'])


In [0]:
# generating the target labels as 0, 1

# Storing the contents of the main dataframe into an array
data_x = dataframe_raw.values

# Creating an empty target variable
y_raw = []
for i in range(data_x.shape[0]):
  
  # If Winner is equals to Team1
  if (data_x[i][7] == data_x[i][2]):
    y_raw.append(0)
  # Else If Winner is equals to Team2
  elif (data_x[i][7] == data_x[i][3]):
    y_raw.append(1)

In [17]:
# Printing y_raw samples
print(len(y_raw))

7494


In [0]:
# Define all the label Encoders
le1 = LabelEncoder()
le2 = LabelEncoder()
le3 = LabelEncoder()
le4 = LabelEncoder()

# Label Encoding all the features
dataframe['Team 1'] = le1.fit_transform(dataframe['Team 1'])
dataframe['Team 2'] = le1.fit_transform(dataframe['Team 2'])
dataframe['Host_Country'] = le1.fit_transform(dataframe['Host_Country'].astype(str))
dataframe['Ground'] = le2.fit_transform(dataframe['Ground'])
dataframe['Venue_Team1'] = le3.fit_transform(dataframe['Venue_Team1'])
dataframe['Venue_Team2'] = le3.fit_transform(dataframe['Venue_Team2'])
dataframe['Innings_Team1'] = le4.fit_transform(dataframe['Innings_Team1'])
dataframe['Innings_Team2'] = le4.fit_transform(dataframe['Innings_Team2'])

In [0]:
# Finally defining our input and targets
x = dataframe.values
y = y_raw

In [0]:
# Splitting into Train and Test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) 

In [66]:
# Building all the models = nn, svm, regression, xgboost

# Building keras_nn
nn = Sequential()
nn.add(Dense(units=32, input_dim=8, activation='relu'))
nn.add(Dropout(0.2))
nn.add(Dense(units=16, activation='relu'))
nn.add(Dense(units=1, activation='softmax'))

nn.compile(loss='sparse_categorical_crossentropy', optimizer ='adam', metrics = ['accuracy'])
print(nn.summary())


# Building Support Vector Machine
svc = SVC()

# Building Logistic Regression Model
regression = LogisticRegression()

# Building XGBoost Model
xgboost = xgb.XGBClassifier()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_27 (Dense)             (None, 32)                288       
_________________________________________________________________
dropout_9 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_28 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_29 (Dense)             (None, 1)                 17        
Total params: 833
Trainable params: 833
Non-trainable params: 0
_________________________________________________________________
None


In [55]:
# Train Support Vector Machine
svc.fit(x_train,y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [56]:
# Train Logistic Regression Model
regression.fit(x_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [57]:
# Train xgboost
xgboost.fit(x_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [62]:
xgboost.score(x_test, y_test)

0.6198310360160071

In [63]:
regression.score(x_test, y_test)

0.5393508225878169

In [67]:
nn.fit(x_test, y_test, epochs=10)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f99d2c03588>