# Using Dataset: matches.csv

In [386]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
import keras
from keras import Sequential
from keras.layers import Dense
from keras.utils import plot_model, print_summary
from sklearn.preprocessing import Imputer, StandardScaler
from keras.preprocessing.text import text_to_word_sequence, Tokenizer

In [387]:
# Loading data
data = pd.read_csv("matches.csv")

In [388]:
data.head()

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,2017,Hyderabad,2017-04-05,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,2,2017,Pune,2017-04-06,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,
2,3,2017,Rajkot,2017-04-07,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,
3,4,2017,Indore,2017-04-08,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,
4,5,2017,Bangalore,2017-04-08,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,


In [389]:
data.toss_decision.unique()

array(['field', 'bat'], dtype=object)

In [390]:
# Encoding data
data.toss_decision = data.toss_decision.map({'bat':1, 'field':0})

In [391]:
data.result.unique()

array(['normal', 'tie', 'no result'], dtype=object)

In [336]:
data.result = data.result.map({'normal':1, 'tie':2, 'no result':0})

## field is 0 and bat is 1 <br>
## normal is 1, tie is 2, no result is 0

In [392]:
data.drop(columns=['venue', 'player_of_match', 'dl_applied','umpire1','umpire2','umpire3','date','city','season','id'], inplace=True)

In [393]:
data.head()

Unnamed: 0,team1,team2,toss_winner,toss_decision,result,winner,win_by_runs,win_by_wickets
0,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,0,normal,Sunrisers Hyderabad,35,0
1,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,0,normal,Rising Pune Supergiant,0,7
2,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,0,normal,Kolkata Knight Riders,0,10
3,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,0,normal,Kings XI Punjab,0,6
4,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,1,normal,Royal Challengers Bangalore,15,0


In [394]:
data.team1.unique()

array(['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',
       'Rising Pune Supergiant', 'Royal Challengers Bangalore',
       'Kolkata Knight Riders', 'Delhi Daredevils', 'Kings XI Punjab',
       'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Rising Pune Supergiants'], dtype=object)

In [395]:
r = len(data.team2.unique())
teams = data.team1.unique()
mapping = {}

In [396]:
for i in range(14):
    mapping[teams[i]] = i

In [397]:
mapping

{'Chennai Super Kings': 8,
 'Deccan Chargers': 10,
 'Delhi Daredevils': 6,
 'Gujarat Lions': 2,
 'Kings XI Punjab': 7,
 'Kochi Tuskers Kerala': 11,
 'Kolkata Knight Riders': 5,
 'Mumbai Indians': 1,
 'Pune Warriors': 12,
 'Rajasthan Royals': 9,
 'Rising Pune Supergiant': 3,
 'Rising Pune Supergiants': 13,
 'Royal Challengers Bangalore': 4,
 'Sunrisers Hyderabad': 0}

In [398]:
data.team1 = data.team1.map(mapping)
data.team2 = data.team2.map(mapping)

In [399]:
data.head()

Unnamed: 0,team1,team2,toss_winner,toss_decision,result,winner,win_by_runs,win_by_wickets
0,0,4,Royal Challengers Bangalore,0,normal,Sunrisers Hyderabad,35,0
1,1,3,Rising Pune Supergiant,0,normal,Rising Pune Supergiant,0,7
2,2,5,Kolkata Knight Riders,0,normal,Kolkata Knight Riders,0,10
3,3,7,Kings XI Punjab,0,normal,Kings XI Punjab,0,6
4,4,6,Royal Challengers Bangalore,1,normal,Royal Challengers Bangalore,15,0


In [400]:
data.toss_winner = data.toss_winner.map(mapping)

In [401]:
data.winner = data.winner.map(mapping)

In [402]:
data.head()

Unnamed: 0,team1,team2,toss_winner,toss_decision,result,winner,win_by_runs,win_by_wickets
0,0,4,4,0,normal,0.0,35,0
1,1,3,3,0,normal,3.0,0,7
2,2,5,5,0,normal,5.0,0,10
3,3,7,7,0,normal,7.0,0,6
4,4,6,4,1,normal,4.0,15,0


In [403]:
data.isna().sum()

team1             0
team2             0
toss_winner       0
toss_decision     0
result            0
winner            3
win_by_runs       0
win_by_wickets    0
dtype: int64

In [404]:
data.winner.fillna(0, axis=0, inplace=True)

In [405]:
data.isna().sum()

team1             0
team2             0
toss_winner       0
toss_decision     0
result            0
winner            0
win_by_runs       0
win_by_wickets    0
dtype: int64

In [406]:
data.winner = data.winner.astype(int)

In [407]:
data.head()

Unnamed: 0,team1,team2,toss_winner,toss_decision,result,winner,win_by_runs,win_by_wickets
0,0,4,4,0,normal,0,35,0
1,1,3,3,0,normal,3,0,7
2,2,5,5,0,normal,5,0,10
3,3,7,7,0,normal,7,0,6
4,4,6,4,1,normal,4,15,0


In [408]:
# Scaling win_by_runs and win_by_wickets
data.win_by_runs

0       35
1        0
2        0
3        0
4       15
5        0
6        0
7        0
8       97
9        0
10       0
11       0
12       0
13      17
14      51
15       0
16      27
17       0
18       5
19      21
20      15
21       0
22       0
23      14
24       0
25      26
26      82
27       3
28       0
29       0
      ... 
606      0
607      7
608      0
609      0
610      0
611      9
612     85
613      0
614      1
615      4
616      0
617      0
618      0
619    144
620      0
621      0
622     80
623      0
624     19
625     82
626      0
627      0
628      0
629      0
630     22
631      0
632      0
633     22
634      0
635      8
Name: win_by_runs, Length: 636, dtype: int64

In [409]:
for i in range(len(data['win_by_runs'].values)):
    if (data['win_by_runs'][i]) >= 20: # Strong team
        data['win_by_runs'][i] = 1
    else:
        data['win_by_runs'][i] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


In [355]:
data.win_by_runs

0      1
1      0
2      0
3      0
4      0
5      0
6      0
7      0
8      1
9      0
10     0
11     0
12     0
13     0
14     1
15     0
16     1
17     0
18     0
19     1
20     0
21     0
22     0
23     0
24     0
25     1
26     1
27     0
28     0
29     0
      ..
606    0
607    0
608    0
609    0
610    0
611    0
612    1
613    0
614    0
615    0
616    0
617    0
618    0
619    1
620    0
621    0
622    1
623    0
624    0
625    1
626    0
627    0
628    0
629    0
630    1
631    0
632    0
633    1
634    0
635    0
Name: win_by_runs, Length: 636, dtype: int64

In [356]:
data.win_by_wickets

0       0
1       7
2      10
3       6
4       0
5       9
6       4
7       8
8       0
9       4
10      8
11      4
12      7
13      0
14      0
15      6
16      0
17      4
18      0
19      0
20      0
21      8
22      4
23      0
24      6
25      0
26      0
27      0
28      7
29      7
       ..
606     8
607     0
608     7
609     5
610     7
611     0
612     0
613     5
614     0
615     0
616     6
617     7
618     7
619     0
620     8
621     7
622     0
623     9
624     0
625     0
626     6
627     6
628     4
629     6
630     0
631     6
632     4
633     0
634     4
635     0
Name: win_by_wickets, Length: 636, dtype: int64

In [410]:
for i in range(len(data['win_by_wickets'].values)):
    if data['win_by_wickets'][i] >= 7: # Strong team
        data['win_by_wickets'][i] = 1
    else:
        data['win_by_wickets'][i] = 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [412]:
data.head()

Unnamed: 0,team1,team2,toss_winner,toss_decision,result,winner,win_by_runs,win_by_wickets
0,0,4,4,0,normal,0,1,0
1,1,3,3,0,normal,3,0,1
2,2,5,5,0,normal,5,0,1
3,3,7,7,0,normal,7,0,0
4,4,6,4,1,normal,4,0,0


In [413]:
# Extracting features and labels
labels = data.winner.values
features = data.drop(columns=["winner"]).values

In [414]:
features.shape

(636, 7)

In [361]:
ndim = features.shape[1]

In [415]:
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, random_state=3, shuffle=True)

In [323]:
len(features_train)

477

In [119]:
len(labels_test)

159

In [120]:
# As the out is multclass, we perform one hot encoding 
from keras.utils import to_categorical
labelsc = data.winner.values
featuresc =  data.drop(columns=["winner"]).values

In [122]:
labelsc = to_categorical(labelsc, num_classes=14)

In [124]:
labelsc[0]

array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [162]:
# Building model
model = Sequential()
model.add(Dense(100, input_dim = featuresc.shape[1], activation="relu"))
model.add(Dense(50, activation="relu"))
model.add(Dense(10, activation="relu"))
model.add(Dense(100, activation="relu"))
model.add(Dense(14, activation="softmax"))

In [133]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 100)               800       
_________________________________________________________________
dense_7 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_8 (Dense)              (None, 10)                510       
_________________________________________________________________
dense_9 (Dense)              (None, 100)               1100      
_________________________________________________________________
dense_10 (Dense)             (None, 14)                1414      
Total params: 8,874
Trainable params: 8,874
Non-trainable params: 0
_________________________________________________________________


In [163]:
model.compile(optimizer="adam", loss=keras.losses.categorical_crossentropy, metrics=["accuracy"])

In [164]:
featuresc_train, featuresc_test, labelsc_train, labelsc_test = train_test_split(featuresc, labelsc, random_state=3, shuffle=True)

In [130]:
len(featuresc_train)

477

In [174]:
model.fit(featuresc_train, labelsc_train, epochs=200, validation_data=(featuresc_test, labelsc_test), batch_size=100)

ValueError: Error when checking input: expected dense_16_input to have 2 dimensions, but got array with shape (477, 7, 14)

In [141]:
model.evaluate(featuresc_test, labelsc_test)



[1.8320924635953124, 0.69811320304870605]

In [144]:
pred = np.round(model.predict(featuresc_test))

In [146]:
pred[0]

array([ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

In [149]:
featuresc_train[0]

array([7, 1, 1, 0, 1, 0, 0])

In [151]:
mapping

{'Chennai Super Kings': 8,
 'Deccan Chargers': 10,
 'Delhi Daredevils': 6,
 'Gujarat Lions': 2,
 'Kings XI Punjab': 7,
 'Kochi Tuskers Kerala': 11,
 'Kolkata Knight Riders': 5,
 'Mumbai Indians': 1,
 'Pune Warriors': 12,
 'Rajasthan Royals': 9,
 'Rising Pune Supergiant': 3,
 'Rising Pune Supergiants': 13,
 'Royal Challengers Bangalore': 4,
 'Sunrisers Hyderabad': 0}

In [152]:
pred[0]

array([ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

In [153]:
pred[1]

array([ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

In [154]:
pred[2]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

In [155]:
pred[3]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

In [166]:
featuresc_train = to_categorical(featuresc_train, num_classes=14)

In [169]:
featuresc_train.shape[1]

7

In [181]:
pred[0]

array([ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

In [180]:
labelsc_test[0]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.])

In [203]:
one = features_test[0]
one

array([7, 5, 7, 1, 1, 0, 0])

In [191]:
one = one.reshape(1,-1)

In [193]:
one.ndim

2

In [201]:
np.round(model.predict(one))

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.]], dtype=float32)

In [202]:
labelsc_test[0]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.])

In [204]:
mapping

{'Chennai Super Kings': 8,
 'Deccan Chargers': 10,
 'Delhi Daredevils': 6,
 'Gujarat Lions': 2,
 'Kings XI Punjab': 7,
 'Kochi Tuskers Kerala': 11,
 'Kolkata Knight Riders': 5,
 'Mumbai Indians': 1,
 'Pune Warriors': 12,
 'Rajasthan Royals': 9,
 'Rising Pune Supergiant': 3,
 'Rising Pune Supergiants': 13,
 'Royal Challengers Bangalore': 4,
 'Sunrisers Hyderabad': 0}

In [205]:
features_train

array([[ 7,  1,  1, ...,  1,  0,  0],
       [ 4,  9,  9, ...,  1,  0,  0],
       [ 7,  1,  7, ...,  1,  0,  0],
       ..., 
       [ 7,  6,  6, ...,  1,  0,  1],
       [ 1,  8,  8, ...,  1,  0,  0],
       [12,  6,  6, ...,  1,  0,  0]])

In [206]:
labels_train

array([ 7,  9,  7,  6,  1,  1,  1,  7,  4,  5,  3,  9,  8,  8, 10,  5,  0,
        7,  7,  7,  7,  5,  7,  5,  1,  6,  8,  0,  8,  5,  7,  9,  8,  1,
        9,  8,  2,  0,  6,  5,  5,  8,  2,  4,  5,  7,  1,  8,  8,  3,  9,
        9,  8,  0,  0,  5,  5,  0, 11, 13,  1,  1,  8,  1,  0,  7,  5,  4,
        9,  6,  1,  4,  5,  8,  9,  8, 10,  8,  1,  1,  6,  1,  7,  6,  1,
        9,  8,  7,  1,  7,  4,  4,  0,  6,  1,  8,  1,  0,  8,  8,  2,  9,
        5,  8,  1, 10,  5,  8,  7,  4,  0,  4,  6,  7,  5,  9,  9,  8,  4,
        1,  5,  0,  9,  4,  1,  5,  9,  9,  4,  1,  1,  6,  2,  4,  6,  4,
        9,  8,  8,  4,  1,  7,  9,  0,  7,  6,  7,  4, 13,  1, 10,  4,  4,
        4,  9,  9,  0,  7,  1,  5,  1,  6,  8,  9,  9,  1,  5, 10,  5,  5,
        6,  7,  4,  1,  7,  7, 11,  4, 10,  0,  4,  1,  4,  6,  6,  6,  6,
        0,  6,  7,  7,  1,  8,  6,  7,  1,  7,  1,  0,  7,  6,  8,  1,  1,
        5,  0,  6,  8,  0, 10,  5,  5,  9,  7,  7,  1,  9,  5,  4,  4,  8,
       12,  9, 10,  0,  3

In [224]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
clf2 = RandomForestClassifier(n_estimators=120)
clf2.fit(features_train, labels_train)


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=120, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

0.61006289308176098

In [210]:
pred = clf.predict(features_test)

In [212]:
pred[0]

5

In [213]:
one

array([7, 5, 7, 1, 1, 0, 0])

In [215]:
clf.predict(one.reshape(1,-1))

array([5])

In [217]:
np.where(pred != labels_test)

(array([  0,   2,   3,   4,   7,   9,  10,  12,  13,  14,  17,  18,  21,
         22,  24,  25,  26,  28,  29,  30,  33,  34,  36,  37,  40,  42,
         43,  44,  45,  47,  49,  50,  51,  53,  56,  58,  59,  62,  64,
         65,  66,  67,  70,  71,  72,  73,  74,  75,  76,  78,  80,  81,
         85,  86,  89,  90,  92,  94,  96, 100, 102, 103, 106, 108, 113,
        114, 116, 118, 120, 121, 122, 124, 125, 126, 127, 128, 129, 130,
        131, 132, 134, 136, 137, 138, 139, 143, 144, 146, 147, 150, 154,
        156, 157]),)

In [230]:
clf2.score(features_test, labels_test)


0.61635220125786161

In [231]:
model.predict(one.reshape(1,-1))

array([[ 0.05812529,  0.04654225,  0.07470595,  0.13458365,  0.0699212 ,
         0.02470694,  0.11555759,  0.11031187,  0.0820372 ,  0.03335182,
         0.06107711,  0.03563847,  0.12495952,  0.02848106]], dtype=float32)

In [235]:
pred = np.round(model.predict(features_test))

In [236]:
pred

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]], dtype=float32)

In [237]:
np.where(pred!=labelsc_test)

(array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
         39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
         78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
        104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
        130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
        143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
        156, 157, 158]),
 array([ 7,  5,  6,  7,  9,  1,  8,  9,  1,  9,  9,  1, 13,  6, 10,  1,  1,
         4,  2,  1,  5,

In [239]:
model.evaluate(features_test, labelsc_test)



[2.8491017758471413, 0.025157232516966527]

In [240]:
features_train

array([[ 7,  1,  1, ...,  1,  0,  0],
       [ 4,  9,  9, ...,  1,  0,  0],
       [ 7,  1,  7, ...,  1,  0,  0],
       ..., 
       [ 7,  6,  6, ...,  1,  0,  1],
       [ 1,  8,  8, ...,  1,  0,  0],
       [12,  6,  6, ...,  1,  0,  0]])

In [241]:
labels_train

array([ 7,  9,  7,  6,  1,  1,  1,  7,  4,  5,  3,  9,  8,  8, 10,  5,  0,
        7,  7,  7,  7,  5,  7,  5,  1,  6,  8,  0,  8,  5,  7,  9,  8,  1,
        9,  8,  2,  0,  6,  5,  5,  8,  2,  4,  5,  7,  1,  8,  8,  3,  9,
        9,  8,  0,  0,  5,  5,  0, 11, 13,  1,  1,  8,  1,  0,  7,  5,  4,
        9,  6,  1,  4,  5,  8,  9,  8, 10,  8,  1,  1,  6,  1,  7,  6,  1,
        9,  8,  7,  1,  7,  4,  4,  0,  6,  1,  8,  1,  0,  8,  8,  2,  9,
        5,  8,  1, 10,  5,  8,  7,  4,  0,  4,  6,  7,  5,  9,  9,  8,  4,
        1,  5,  0,  9,  4,  1,  5,  9,  9,  4,  1,  1,  6,  2,  4,  6,  4,
        9,  8,  8,  4,  1,  7,  9,  0,  7,  6,  7,  4, 13,  1, 10,  4,  4,
        4,  9,  9,  0,  7,  1,  5,  1,  6,  8,  9,  9,  1,  5, 10,  5,  5,
        6,  7,  4,  1,  7,  7, 11,  4, 10,  0,  4,  1,  4,  6,  6,  6,  6,
        0,  6,  7,  7,  1,  8,  6,  7,  1,  7,  1,  0,  7,  6,  8,  1,  1,
        5,  0,  6,  8,  0, 10,  5,  5,  9,  7,  7,  1,  9,  5,  4,  4,  8,
       12,  9, 10,  0,  3

In [249]:
model.predict_classes(one.reshape(1,-1))

array([3])

In [252]:
features_train[0]

array([7, 1, 1, 0, 1, 0, 0])

In [253]:
mapping

{'Chennai Super Kings': 8,
 'Deccan Chargers': 10,
 'Delhi Daredevils': 6,
 'Gujarat Lions': 2,
 'Kings XI Punjab': 7,
 'Kochi Tuskers Kerala': 11,
 'Kolkata Knight Riders': 5,
 'Mumbai Indians': 1,
 'Pune Warriors': 12,
 'Rajasthan Royals': 9,
 'Rising Pune Supergiant': 3,
 'Rising Pune Supergiants': 13,
 'Royal Challengers Bangalore': 4,
 'Sunrisers Hyderabad': 0}

In [254]:
labelsc_train

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  1.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [258]:
from keras.preprocessing.text import one_hot

In [270]:
one_hot(data.team1.values[11],n=5)

[4, 4, 3]

In [271]:
from sklearn.preprocessing import LabelEncoder

In [277]:
encoder = LabelEncoder()
encoder.fit(["ada","sdad"])

LabelEncoder()

In [278]:
encoder.transform(["ada","sdad"])

array([0, 1])

In [279]:
teams

array(['Sunrisers Hyderabad', 'Mumbai Indians', 'Gujarat Lions',
       'Rising Pune Supergiant', 'Royal Challengers Bangalore',
       'Kolkata Knight Riders', 'Delhi Daredevils', 'Kings XI Punjab',
       'Chennai Super Kings', 'Rajasthan Royals', 'Deccan Chargers',
       'Kochi Tuskers Kerala', 'Pune Warriors', 'Rising Pune Supergiants'], dtype=object)

In [280]:
encoder.fit(teams)

LabelEncoder()

In [281]:
encoder.transform(teams)

array([13,  7,  3, 10, 12,  6,  2,  4,  0,  9,  1,  5,  8, 11])

In [283]:
y = to_categorical(encoder.transform(teams))

In [285]:
y[0]

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.])

In [286]:
y.shape

(14, 14)

In [289]:
features_train = StandardScaler().fit_transform(features_train)



In [290]:
features_train

array([[ 0.40455895, -1.44032478, -1.397596  , ..., -0.04583492,
        -0.57977104, -0.58299883],
       [-0.51436781,  1.08902605,  1.04323985, ..., -0.04583492,
        -0.57977104, -0.58299883],
       [ 0.40455895, -1.44032478,  0.43303089, ..., -0.04583492,
        -0.57977104, -0.58299883],
       ..., 
       [ 0.40455895,  0.14051949,  0.12792641, ..., -0.04583492,
        -0.57977104,  1.7152693 ],
       [-1.43329456,  0.7728572 ,  0.73813537, ..., -0.04583492,
        -0.57977104, -0.58299883],
       [ 1.93610354,  0.14051949,  0.12792641, ..., -0.04583492,
        -0.57977104, -0.58299883]])

In [366]:
from sklearn.linear_model import LinearRegression
lg = LinearRegression()
lg.fit(features_train, labels_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [367]:
lg.score(features_test, labels_test)

ValueError: shapes (159,7) and (36,) not aligned: 7 (dim 1) != 36 (dim 0)

In [363]:
from sklearn.preprocessing import PolynomialFeatures
p = PolynomialFeatures()

In [365]:
features_train = p.fit_transform(features_train)

In [374]:
to_categorical(data.team1.values).shape

(636, 14)

In [385]:
to_categorical(labels_train)[4]

array([ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [383]:
features_train[0]

array([  1.,   7.,   1.,   1.,   0.,   1.,   0.,   0.,  49.,   7.,   7.,
         0.,   7.,   0.,   0.,   1.,   1.,   0.,   1.,   0.,   0.,   1.,
         0.,   1.,   0.,   0.,   0.,   0.,   0.,   0.,   1.,   0.,   0.,
         0.,   0.,   0.])