In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from MiniBatch import MiniBatch

df = pd.read_csv('dataset.csv')
df

Unnamed: 0,outlook,temperature,humidity,windy,play
0,sunny,85,85,False,no
1,sunny,80,90,True,no
2,overcast,83,86,False,yes
3,rainy,70,96,False,yes
4,rainy,68,80,False,yes
5,rainy,65,70,True,no
6,overcast,64,65,True,yes
7,sunny,72,95,False,no
8,sunny,69,70,False,yes
9,rainy,75,80,False,yes


In [2]:
def rationalize(list_df) :
    max_df = list_df.max()
    result = list(map(lambda x: x/(max_df)*1, list_df))
    return result

df['temperature'] = rationalize(df['temperature'])
df['humidity'] = rationalize(df['humidity'])
df

Unnamed: 0,outlook,temperature,humidity,windy,play
0,sunny,1.0,0.885417,False,no
1,sunny,0.941176,0.9375,True,no
2,overcast,0.976471,0.895833,False,yes
3,rainy,0.823529,1.0,False,yes
4,rainy,0.8,0.833333,False,yes
5,rainy,0.764706,0.729167,True,no
6,overcast,0.752941,0.677083,True,yes
7,sunny,0.847059,0.989583,False,no
8,sunny,0.811765,0.729167,False,yes
9,rainy,0.882353,0.833333,False,yes


In [3]:
df['outlook'] = pd.Categorical(df['outlook'])
dfOutlook = pd.get_dummies(df['outlook'], prefix = 'outlook')
dfOutlook

Unnamed: 0,outlook_overcast,outlook_rainy,outlook_sunny
0,0,0,1
1,0,0,1
2,1,0,0
3,0,1,0
4,0,1,0
5,0,1,0
6,1,0,0
7,0,0,1
8,0,0,1
9,0,1,0


In [4]:
df['windy'] = pd.Categorical(df['windy'])
dfWindy = pd.get_dummies(df['windy'], prefix = 'windy')
dfWindy

Unnamed: 0,windy_False,windy_True
0,1,0
1,0,1
2,1,0
3,1,0
4,1,0
5,0,1
6,0,1
7,1,0
8,1,0
9,1,0


In [5]:
df['play'] = df['play'].astype('category')
df['play'] = df['play'].cat.codes

df = pd.concat([df, dfOutlook, dfWindy], axis=1)
df = df.drop(columns=['outlook', 'windy'], axis=1)
df

Unnamed: 0,temperature,humidity,play,outlook_overcast,outlook_rainy,outlook_sunny,windy_False,windy_True
0,1.0,0.885417,0,0,0,1,1,0
1,0.941176,0.9375,0,0,0,1,0,1
2,0.976471,0.895833,1,1,0,0,1,0
3,0.823529,1.0,1,0,1,0,1,0
4,0.8,0.833333,1,0,1,0,1,0
5,0.764706,0.729167,0,0,1,0,0,1
6,0.752941,0.677083,1,1,0,0,0,1
7,0.847059,0.989583,0,0,0,1,1,0
8,0.811765,0.729167,1,0,0,1,1,0
9,0.882353,0.833333,1,0,1,0,1,0


In [6]:
features = df.drop(columns=['play'], axis=1)
targets = df['play']

print (features.head)
print ()
print (targets.head)

<bound method NDFrame.head of     temperature  humidity  outlook_overcast  outlook_rainy  outlook_sunny  \
0      1.000000  0.885417                 0              0              1   
1      0.941176  0.937500                 0              0              1   
2      0.976471  0.895833                 1              0              0   
3      0.823529  1.000000                 0              1              0   
4      0.800000  0.833333                 0              1              0   
5      0.764706  0.729167                 0              1              0   
6      0.752941  0.677083                 1              0              0   
7      0.847059  0.989583                 0              0              1   
8      0.811765  0.729167                 0              0              1   
9      0.882353  0.833333                 0              1              0   
10     0.882353  0.729167                 0              0              1   
11     0.847059  0.937500                 1   

## Training and Testing

In [7]:
features_train, features_test, targets_train, targets_test = train_test_split(features, targets, test_size=0.5, stratify=targets, random_state=None)
print (features_train)
print ()
print (targets_train)
print ()

print (features_test)
print ()
print (targets_test)
print ()

    temperature  humidity  outlook_overcast  outlook_rainy  outlook_sunny  \
13     0.835294  0.947917                 0              1              0   
2      0.976471  0.895833                 1              0              0   
5      0.764706  0.729167                 0              1              0   
3      0.823529  1.000000                 0              1              0   
7      0.847059  0.989583                 0              0              1   
11     0.847059  0.937500                 1              0              0   
8      0.811765  0.729167                 0              0              1   

    windy_False  windy_True  
13            0           1  
2             1           0  
5             0           1  
3             1           0  
7             1           0  
11            0           1  
8             1           0  

13    0
2     1
5     0
3     1
7     0
11    1
8     1
Name: play, dtype: int8

    temperature  humidity  outlook_overcast  outlook_rainy  o

In [8]:
model = MiniBatch(2, 3, 2, 0.1, 0.5, 4)
model.fit(features_train, targets_train)
print ('Predict')
predict = model.predict(features_test)

print (len(predict))
print (len(targets_test))

print ('Accuracy\t', accuracy_score(targets_test, predict))

2.66609674335459
2.9280582416919607
2.6349923598854215
1.2687531903159353
1.9823428416868751
1.7025831260347843
1.3365911061645772
2.157948823685731
3.2561201971036877
2.723083134845734
1.2804935840513112
2.099736149763219
1.7956928823892666
1.4059147680372075
[[[0.934996200484438, 0.9492161545120064], [0.9330799559113717, 0.7805292399476111], [0.8789306888931844, 0.8458718049791426], [0.7919287911351179]], [[0.8964092316957808, 0.9628924116517024], [0.938375064264405, 0.7825337836352216], [0.8908775313133809, 0.8576238232940065], [0.8031207956082722]]]
2.6598265525489
3.3688688643463007
2.698049261987844
1.413825565793419
2.0397075444374617
1.7545928442702206
1.3130721060339468
2.7697203067356733
3.0472493143333406
2.7335672647816915
1.4509096548397302
2.0359419658421953
1.7560196532479158
1.3155506955333125
[[[0.9346140678935264, 0.9667173161380274], [0.936911437175194, 0.8043686346784468], [0.8849034849107937, 0.8525311579489938], [0.7880267763117642]], [[0.9410174644679936, 0.95466