# Predicting the Outcome of Cricket Matches

In [2]:
%matplotlib inline 
import numpy as np # imports a fast numerical programming library
import pandas as pd #lets us handle data as dataframes
#sets up pandas table display
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from patsy import dmatrices

In [3]:
matches = pd.read_csv("../data/matcheswithfeatures.csv", index_col = 0)

In [4]:
y, X = dmatrices('team1Winning ~ 0 + Avg_SR_Difference + Avg_WPR_Difference + Total_MVP_Difference + Prev_Enc_Team1_WinPerc + \
                  Total_RF_Difference', matches, return_type="dataframe")
y_arr = np.ravel(y)

In [5]:
X

Unnamed: 0,Avg_SR_Difference,Avg_WPR_Difference,Total_MVP_Difference,Prev_Enc_Team1_WinPerc,Total_RF_Difference
5,55.665975,1.414786,0.0,0.000000,0.000000
7,6.135734,-1.591368,1.0,0.000000,100.000000
9,4.666844,0.111379,0.0,0.000000,0.000000
10,25.388743,-0.021123,0.0,0.000000,0.000000
11,-28.438618,11.723738,0.0,0.000000,0.000000
13,41.221731,6.066625,0.0,0.000000,66.666667
14,37.233069,0.581470,2.0,0.000000,66.666667
15,-13.582248,1.010938,1.0,0.000000,50.000000
16,15.293648,2.058102,-1.0,0.000000,16.666667
18,40.069300,-2.720529,2.0,0.000000,33.333333


### Splitting Training Set (2008-2013) and Test Set (2013-2015) based on Seasons

In [6]:
X_timetrain = X.loc[X.index < 398]
Y_timetrain = y.loc[y.index < 398]
Y_timetrain_arr = np.ravel(Y_timetrain)
X_timetest = X.loc[X.index >= 398]
Y_timetest = y.loc[y.index >= 398]
Y_timetest_arr = np.ravel(Y_timetest)

In [7]:
# Best values of k in time-based split data
knn1 = KNeighborsClassifier(n_neighbors = 31)
knn1.fit(X_timetrain, Y_timetrain_arr)
y_pred = knn1.predict(X_timetest)
print "Accuracy is ", metrics.accuracy_score(Y_timetest_arr, y_pred)*100, "%"

Accuracy is  64.367816092 %


In [8]:
X_timetest

Unnamed: 0,Avg_SR_Difference,Avg_WPR_Difference,Total_MVP_Difference,Prev_Enc_Team1_WinPerc,Total_RF_Difference
398,-9.646646,0.466526,6.0,16.666667,0.000000
399,4.963605,0.097800,12.0,50.000000,0.000000
400,7.079810,0.432566,11.0,70.000000,0.000000
402,21.485599,1.176414,17.0,53.846154,-100.000000
403,-4.503334,1.663169,15.0,54.545455,100.000000
404,-7.297630,-0.332117,-1.0,72.727273,-100.000000
405,12.183316,2.316918,5.0,66.666667,-50.000000
407,-5.341707,2.620287,12.0,61.538462,50.000000
408,5.093091,0.588349,20.0,54.545455,-50.000000
410,-13.668459,-2.328697,0.0,60.000000,-66.666667


In [21]:
def getPrediction(match_id):
    '''Returns the prediction for the given match
    
    Args: match_id (int): Match ID for the required game
    
    Returns: String: Predicted winner of the game and probability of victory 
    '''
    try:
        assert (399 <= match_id <= 517)
        results = {}
        match_row = matches.loc[matches['id'] == match_id]
        team1name = match_row.team1.unique()[0]
        team2name = match_row.team2.unique()[0]
        toPredict = X_timetest.loc[X_timetest.index == match_id-1].values
        prediction_prob = knn1.predict_proba(toPredict)
        prediction = knn1.predict(toPredict)
        if prediction[0] > 0:
            results['name'] = str(team1name)
            results['prob'] = float(prediction_prob[0][1])*100
        else:
            results['name'] = str(team2name)
            results['prob'] = float(prediction_prob[0][0])*100
        return results
    except AssertionError:
        return None;

In [23]:
print getPrediction(617)

None
