In [8]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
%matplotlib inline

In [3]:
train_data = pd.read_csv('data/train.csv')
test_data = pd.read_csv('data/test.csv')

In [12]:
combined_set = pd.concat([train_data, test_data])
combined_set['combined_var'] = (combined_set.hair_length * .40) + (combined_set.has_soul * .40)

# Replace categorical variables with numbers
def label_encoding(df, col):
    label_map = { key: float(n) for n, key in enumerate(df[col].unique()) }
    label_reverse_map = { label_map[key]: key for key in label_map }
    df[col] = df[col].apply(lambda x: label_map[x])
    return df, label_map, label_reverse_map

combined_set = pd.get_dummies(combined_set, columns=['color'])
combined_set

train_set = combined_set[:len(train_data.index)]
test_set = combined_set[len(train_data.index):]

In [5]:
monster_types = pd.get_dummies(train_set.type)

In [6]:
train_cols = ['combined_var', 'rotting_flesh', 'bone_length', 'has_soul', 'hair_length']

In [18]:
models = {}
predictions = {}

for monster_type in monster_types.columns:
    train = train_set[train_cols]
    preds = monster_types[monster_type]
    combined_train_set = pd.concat([train[train_cols], preds], axis=1)
    p_train, val = train_test_split(combined_train_set, test_size=.30)
    model = MLPClassifier()
    model.fit(p_train[train_cols], p_train[monster_type])
    print(monster_type)
    print(classification_report(model.predict(val[train_cols]), val[monster_type]))
    preds = model.predict(test_set[train_cols])
    predictions[monster_type] = preds
    



Ghost
             precision    recall  f1-score   support

          0       0.96      0.88      0.92        84
          1       0.71      0.89      0.79        28

avg / total       0.90      0.88      0.89       112

Ghoul
             precision    recall  f1-score   support

          0       0.91      0.80      0.85        76
          1       0.67      0.83      0.74        36

avg / total       0.83      0.81      0.82       112

Goblin
             precision    recall  f1-score   support

          0       0.95      0.71      0.81       104
          1       0.12      0.50      0.19         8

avg / total       0.89      0.70      0.77       112



In [19]:
final_predictions = pd.np.argmax(pd.DataFrame(predictions).values, axis=1)
label_map = {index: col for index, col in enumerate(pd.DataFrame(predictions).columns)}
sub = pd.DataFrame({'id': test_set.id, 'type': final_predictions})
sub['type'] = sub.type.apply(lambda x: label_map[x])
sub.to_csv('nn_ens_solution1.csv', index=False)

Score = .54 on public leader board
----------------------------------