In [14]:
import numpy as np
import pandas as pd 
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.preprocessing import PolynomialFeatures


In [11]:
df = pd.read_csv('../input/train.csv', header=0)
df.head()

Unnamed: 0,Id,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,...,Soil_Type32,Soil_Type33,Soil_Type34,Soil_Type35,Soil_Type36,Soil_Type37,Soil_Type38,Soil_Type39,Soil_Type40,Cover_Type
0,1,2596,51,3,258,0,510,221,232,148,...,0,0,0,0,0,0,0,0,0,5
1,2,2590,56,2,212,-6,390,220,235,151,...,0,0,0,0,0,0,0,0,0,5
2,3,2804,139,9,268,65,3180,234,238,135,...,0,0,0,0,0,0,0,0,0,2
3,4,2785,155,18,242,118,3090,238,238,122,...,0,0,0,0,0,0,0,0,0,2
4,5,2595,45,2,153,-1,391,220,234,150,...,0,0,0,0,0,0,0,0,0,5


In [15]:
target_names = ['1 - Spruce/Fir', '2 - Lodgepole Pine', '3 - Ponderosa Pine', 
                '4 - Cottonwood/Willow', '5 - Aspen', '6 - Douglas-fir',
                '7 - Krummholz']
data = df.drop(['Id', df.columns[len(df.columns) - 1]], axis=1)
labels = df[df.columns[len(df.columns) - 1]]
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.33, random_state=42)
std_scale = StandardScaler().fit(X_train)
X_train_std = std_scale.transform(X_train)
X_test_std = std_scale.transform(X_test)

In [16]:
# The competition datafiles are in the directory ../input
# Read competition data files:
train = pd.read_csv("../input/train.csv")
test  = pd.read_csv("../input/test.csv")


train_data = train.drop('Id', axis=1)
test_data = test.drop('Id', axis=1)

train_labels = train_data['Cover_Type']
train_data = train_data.drop('Cover_Type',axis=1)

In [24]:
pca = PCA(n_components=54)
pca.fit(X_train_std)

model = make_pipeline(PCA(), MLPClassifier(solver='lbfgs'))
alphas = [.000001, .00001, .0001, 0.001, 0.01, 0.1]
tols = [.0001, .001]
warm_starts = [True, False]
layer_sizes = [(5, 2), (10, 2), (20, 5)]
param_grid = {
    'pca__n_components': range(21, 23),
    'mlpclassifier__alpha': alphas,
    'mlpclassifier__tol': tols,
    'mlpclassifier__warm_start': warm_starts,
    'mlpclassifier__hidden_layer_sizes': layer_sizes
}
grid = GridSearchCV(model, param_grid=param_grid, cv=3, n_jobs=6)
%time grid.fit(X_train_std, y_train)
print('Best params for Neural Network: {}'. \
      format(grid.best_params_))

#Best params for Neural Network:
#{'mlpclassifier__hidden_layer_sizes': (20, 5),
#'pca__n_components': 21, 'mlpclassifier__tol': 0.0001,
#'mlpclassifier__alpha': 1e-06, 'mlpclassifier__warm_start': True}
#clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(100, 40), random_state=1)
#clf.fit(train_data, train_labels)
#test_data['Cover_Type'] = pd.Series(clf.predict(test_data))

#submission = pd.concat([test['Id'].astype(int), test_data['Cover_Type']], axis=1)
#submission = submission.set_index('Id')
#submission.to_csv('neural_submission.csv')


print(submission.shape)

Wall time: 7min 25s
Best params for Neural Network: {'mlpclassifier__hidden_layer_sizes': (20, 5), 'pca__n_components': 21, 'mlpclassifier__tol': 0.0001, 'mlpclassifier__alpha': 1e-06, 'mlpclassifier__warm_start': True}
(565892, 1)


In [27]:

model = grid.best_estimator_
y_pred = model.predict(X_test_std)
print(classification_report(y_test, y_pred, target_names=target_names))

test_data['Cover_Type'] = pd.Series(model.predict(test_data))

submission = pd.concat([test['Id'].astype(int), test_data['Cover_Type']], axis=1)
submission = submission.set_index('Id')
submission.to_csv('neural_submission.csv')
print(submission.shape)

                       precision    recall  f1-score   support

       1 - Spruce/Fir       0.61      0.64      0.62       679
   2 - Lodgepole Pine       0.61      0.49      0.54       722
   3 - Ponderosa Pine       0.63      0.62      0.62       704
4 - Cottonwood/Willow       0.82      0.93      0.87       739
            5 - Aspen       0.75      0.80      0.77       721
      6 - Douglas-fir       0.69      0.63      0.66       722
        7 - Krummholz       0.88      0.91      0.90       703

          avg / total       0.71      0.72      0.71      4990

(565892, 1)
