In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

train = train.dropna()
test = test.dropna()

train['month'] = pd.DatetimeIndex(train['datetime']).month
train['week'] = pd.DatetimeIndex(train['datetime']).week
train['hour'] = pd.DatetimeIndex(train['datetime']).hour

test['month'] = pd.DatetimeIndex(test['datetime']).month
test['week'] = pd.DatetimeIndex(test['datetime']).week
test['hour'] = pd.DatetimeIndex(test['datetime']).hour

trainX = train[['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed', 'month', 'week', 'hour']].values.astype("float32")
trainY = train['count'].values.astype("int")

testX = test[['season', 'holiday', 'workingday', 'weather', 'temp', 'atemp', 'humidity', 'windspeed', 'month', 'week', 'hour']].values.astype("float32")

In [5]:
parameters = {'min_samples_split' : [20], 'n_estimators' : [100, 200]}
clf = GridSearchCV(RandomForestClassifier(), parameters, n_jobs=1, verbose=1)

clf.fit(trainX, trainY)

print("Best score: %0.3f" % clf.best_score_)
print("Best parameters set:")
best_parameters = clf.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print("\t%s: %r" % (param_name, best_parameters[param_name]))
    
# Get best model
best_model = clf.best_estimator_

# Fit model with best parameters optimized for quadratic_weighted_kappa
best_model.fit(trainX, trainY)
preds = best_model.predict(testX)

# Create your submission file
submission = pd.DataFrame({"datetime": test['datetime'], "count": preds})
submission.to_csv("submission.csv", index=False)

[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    5.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   51.6s finished


Fitting 3 folds for each of 2 candidates, totalling 6 fits
Best score: 0.015
Best parameters set:
	min_samples_split: 20
	n_estimators: 100


In [6]:
# LB score : 0.72923
submission

Unnamed: 0,count,datetime
0,5,2011-01-20 00:00:00
1,5,2011-01-20 01:00:00
2,2,2011-01-20 02:00:00
3,2,2011-01-20 03:00:00
4,2,2011-01-20 04:00:00
5,2,2011-01-20 05:00:00
6,88,2011-01-20 06:00:00
7,99,2011-01-20 07:00:00
8,124,2011-01-20 08:00:00
9,97,2011-01-20 09:00:00
