In [None]:
from pandas import read_csv
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold

In [None]:
# load dataset
dataframe = pd.read_csv("for_neural_net.csv", index_col=0)
dataset = dataframe.values

In [None]:
dataset.shape

In [None]:
dataframe['y'] = dataframe['team1_win']
dataframe

In [None]:
dataframe = dataframe.drop(columns=['team1_win','winner', 'win_by_runs','win_by_wickets', 'avgTeam1_runs_greater', 'avgTeam1_wickets_greater', 'toss_winner', 'umpire1'])

In [None]:
dataset = dataframe.values

In [None]:
dataset.shape

In [None]:
# split into input (X) and output (Y) variables
X = dataset[:,0:dataset.shape[1] - 1].astype(float)
Y = dataset[:,dataset.shape[1] - 1]

In [None]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
encoded_Y

In [None]:
# baseline model
def create_baseline():
    # create model
    model = Sequential()
    model.add(Dense(X.shape[1], input_dim=X.shape[1], activation='relu'))
    model.add(Dense(X.shape[1]/2, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


In [None]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=50, batch_size=10, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
# results = cross_val_score(estimator, x_train, y_train, cv=kfold)

print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

In [None]:
results

In [None]:
x_train = pd.read_csv("x_train.csv", index_col=0)
y_train = pd.read_csv("y_train.csv", index_col=0)
x_test = pd.read_csv("x_test.csv", index_col=0)
y_test = pd.read_csv("y_test.csv", index_col=0)

In [None]:
x_train = x_train.values
x_test = x_test.values
y_train = y_train.values
y_test = y_test.values

In [None]:
encoder = LabelEncoder()
encoder.fit(y_train)
y_train = encoder.transform(y_train)
y_train

In [None]:
model = Sequential()
model.add(Dense(19, input_dim=x_train.shape[1], activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# Fit the model
history = model.fit(x_train, y_train, validation_split=0.1, epochs=50, batch_size=10, verbose=0)
# evaluate the model
scores = model.evaluate(x_test, y_test, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
from statistics import mean 

print("Train accuracy: ",mean(history.history['accuracy']))
print("Test accuracy: ",mean(history.history['val_accuracy']))

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy - keras neural network')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss - keras neural network')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()