In [97]:
import pandas as pd
import xgboost as xgb

from keras.layers import Dense, Dropout, Input
from keras.models import Model
from keras.utils.np_utils import to_categorical

from sklearn.metrics import accuracy_score

In [6]:
train_data = pd.read_csv('data/train.csv', low_memory=False)
test_data = pd.read_csv('data/test.csv', low_memory=False)

combined_set = pd.concat([train_data, test_data])
combined_set['combined_var'] = (combined_set.hair_length * .40) + (combined_set.has_soul * .40)


In [7]:
# Replace categorical variables with numbers
def label_encoding(df, col):
    label_map = { key: float(n) for n, key in enumerate(df[col].unique()) }
    label_reverse_map = { label_map[key]: key for key in label_map }
    df[col] = df[col].apply(lambda x: label_map[x])
    return df, label_map, label_reverse_map

combined_set, _, _ = label_encoding(combined_set, 'color')


In [8]:
train_set = combined_set[:len(train_data.index)]
test_set = combined_set[len(train_data.index):]

train_set, type_label_map, type_label_reverse_map = label_encoding(train_set, 'type')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [9]:
train_cols = ['combined_var', 'rotting_flesh', 'bone_length', 'has_soul']
target_var = 'type'
selected_cols = train_cols + [target_var]

In [10]:
dtrain = xgb.DMatrix(train_set[train_cols], feature_names=train_cols, label=train_set[target_var])
dtest = xgb.DMatrix(test_set[train_cols], feature_names=train_cols)


In [13]:
params = {'max_depth':5, 'eta':0.0001, 'silent':1, 'objective':'multi:softprob'}
params['num_class'] = 3
params['seed'] = 7
params['subsample'] = 0.6
params['gamma'] = 4
params['min_child_weight'] = 0
params['tree_method'] = 'exact'
params['sketch_eps'] = 0.4

In [14]:
model = xgb.train(params, dtrain, num_boost_round=50)

In [26]:
predictions = model.predict(dtest)
train_predictions = model.predict(dtrain)

  preds = preds.reshape(nrow, preds.size / nrow)


In [32]:
nn_input = pd.DataFrame(train_predictions)
nn_input.columns = [type_label_reverse_map[x] for x in nn_input.columns]
nn_input['type'] = dtrain.get_label()
print(nn_input.head())

nn_test = pd.DataFrame(predictions)
nn_test.columns = [type_label_reverse_map[x] for x in nn_test.columns]

      Ghoul    Goblin     Ghost  type
0  0.334635  0.333100  0.332265   0.0
1  0.332946  0.334293  0.332761   1.0
2  0.334852  0.332871  0.332277   0.0
3  0.335122  0.332664  0.332213   0.0
4  0.333950  0.333359  0.332691   2.0


In [109]:
nn_train = nn_input[:int(len(nn_input)*.90)]
nn_validate = nn_input[int(len(nn_input)*.90):]

In [117]:
def make_model(inps=3):
    inputs    = Input(shape=(inps, ))
    dense_1   = Dense(24, activation='softmax')(inputs)
    dropout_1 = Dropout(0.1)(dense_1)
    dense_2   = Dense(6, activation='softmax')(inputs)
    dropout_2 = Dropout(1)(dense_2)
    output    = Dense(3, activation='softmax')(dropout_2)
    
    nnet = Model(input=inputs, output=output)
    nnet.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    return nnet

In [118]:
nnet = make_model(3)

In [119]:
nn_cols = ['Ghoul', 'Goblin', 'Ghost']
nn_target = ['type']

In [120]:
nnet.fit(nn_train[nn_cols].values, pd.get_dummies(nn_train[target], columns=target).values, batch_size=6, nb_epoch=40, shuffle=False,
         validation_data=(nn_validate[nn_cols].values, pd.get_dummies(nn_validate[target], columns=target).values))

Train on 333 samples, validate on 38 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7fc2feb17ef0>

In [121]:
accuracy_score(nn_validate[target].values,
    pd.np.argmax(nnet.predict(nn_validate[nn_cols].values), axis=1))

0.36842105263157893

This Failed Misserably the data is too less maybe...