In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import math

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

dtypes = {
    'season': np.uint8,
    'holiday': np.uint8,
    'workingday': np.uint8,
    'temp': np.float16,
    'atemp': np.float16,
    'humidity': np.float16,
    'windspeed': np.float16,
    'casual': np.uint16,
    'registered': np.uint16,
    'count': np.uint16
}

df = pd.read_csv('bike_train.csv', dtype=dtypes, parse_dates=['datetime'])
df = df.sample(len(df))
df.head(20)

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
8938,2012-08-14 19:00:00,3,0,1,1,31.15625,36.375,66.0,13.0,94,533,627
4763,2011-11-11 13:00:00,4,1,0,1,15.578125,19.6875,37.0,37.0,57,179,236
2520,2011-06-12 22:00:00,2,0,0,1,26.234375,28.796875,83.0,9.0,31,97,128
5471,2012-01-03 02:00:00,1,0,1,1,7.378906,8.335938,51.0,17.0,0,3,3
10175,2012-11-09 09:00:00,4,0,1,1,13.9375,15.148438,53.0,28.0,23,304,327
10253,2012-11-12 15:00:00,4,1,0,1,22.953125,26.515625,64.0,17.0,102,280,382
2169,2011-05-17 07:00:00,2,0,1,2,21.3125,25.0,94.0,17.0,13,138,151
3537,2011-08-17 07:00:00,3,0,1,1,27.0625,31.0625,65.0,6.003906,12,296,308
268,2011-01-12 15:00:00,1,0,1,1,8.203125,8.335938,47.0,28.0,1,48,49
6332,2012-03-01 02:00:00,1,0,1,2,18.859375,22.71875,94.0,19.0,0,6,6


In [2]:
X = df.drop(['casual', 'registered', 'count'], axis=1)
y = df[['casual', 'registered']].values

In [3]:
from sklearn.preprocessing import StandardScaler, maxabs_scale, minmax_scale

def preprocess(X):
    # column 추가
    X['year'] = X['datetime'].map(lambda x: np.uint8(x.year))
    X['hour'] = X['datetime'].map(lambda x: np.uint8(x.hour))
    X['hour3'] = X['datetime'].map(lambda x: np.uint8(x.hour / 3))
    X['hour6'] = X['datetime'].map(lambda x: np.uint8(x.hour / 6))
    X['month'] = X['datetime'].map(lambda x: np.uint8(x.month))
    X['month3'] = X['datetime'].map(lambda x: np.uint8(x.month / 3))
    X['weekday'] = X['datetime'].map(lambda x: np.uint8(x.weekday()))
    X['weekend'] = X['weekday'].map(lambda x: 1 if x in [5, 6] else 0)
    
    # one_hot 인코딩
    X = pd.get_dummies(X, columns=['year', 'season', 'hour', 'hour3', 'hour6', 'weather', 'weekday', 'month', 'month3'])
    
    # 필요없는 column 제거
    del X['datetime']
    
    # scaling
    X['temp'] = minmax_scale(X['temp'])
    X['atemp'] = minmax_scale(X['atemp'])
    X['humidity'] = minmax_scale(X['humidity'])
    X['windspeed'] = minmax_scale(X['windspeed'])
    
    return X


processedX = preprocess(X)
processedX.head(10)

Unnamed: 0,holiday,workingday,temp,atemp,humidity,windspeed,weekend,year_219,year_220,season_1,...,month_8,month_9,month_10,month_11,month_12,month3_0,month3_1,month3_2,month3_3,month3_4
8938,0,1,0.754883,0.796387,0.660156,0.228149,0,0,1,0,...,1,0,0,0,0,0,0,1,0,0
4763,1,0,0.367188,0.42334,0.370117,0.649414,0,1,0,0,...,0,0,0,1,0,0,0,0,1,0
2520,0,0,0.632324,0.626953,0.830078,0.157959,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
5471,0,1,0.163208,0.169556,0.510254,0.29834,0,0,1,1,...,0,0,0,0,0,1,0,0,0,0
10175,0,1,0.326416,0.321777,0.530273,0.491211,0,0,1,0,...,0,0,0,1,0,0,0,0,1,0
10253,1,0,0.550781,0.576172,0.640137,0.29834,0,0,1,0,...,0,0,0,1,0,0,0,0,1,0
2169,0,1,0.509766,0.541992,0.94043,0.29834,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
3537,0,1,0.652832,0.677734,0.649902,0.105347,0,1,0,0,...,1,0,0,0,0,0,0,1,0,0
268,0,1,0.183716,0.169556,0.470215,0.491211,0,1,0,1,...,0,0,0,0,0,1,0,0,0,0
6332,0,1,0.44873,0.491211,0.94043,0.333496,0,0,1,1,...,0,0,0,0,0,0,1,0,0,0


In [12]:
from keras.models import Sequential
from keras.layers import Input
from keras.layers.core import Dense, Dropout
from keras.layers.noise import GaussianDropout, AlphaDropout
from keras.optimizers import Adam

model = Sequential()
model.add(Dense(256, input_shape=(77,), activation='relu', bias_initializer='lecun_normal'))
model.add(GaussianDropout(0.5))
model.add(Dense(256, activation='relu', bias_initializer='lecun_normal'))
model.add(GaussianDropout(0.5))
model.add(Dense(256, activation='relu', bias_initializer='lecun_normal'))
model.add(GaussianDropout(0.5))
model.add(Dense(256, activation='relu', bias_initializer='lecun_normal'))
model.add(GaussianDropout(0.5))
model.add(Dense(256, activation='relu', bias_initializer='lecun_normal'))
model.add(GaussianDropout(0.5))
model.add(Dense(2))
model.compile(optimizer=Adam(0.0001), loss='mean_squared_logarithmic_error')

In [14]:
model.fit(processedX.values, y, validation_split=0.1, epochs=100)

Train on 9797 samples, validate on 1089 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x2a7f1890908>

In [15]:
df_test = pd.read_csv('bike_test.csv', dtype=dtypes, parse_dates=['datetime'])
X_test = preprocess(df_test)

In [16]:
df_test['count'] = np.sum(model.predict(X_test.values), axis=1)

In [17]:
df_test[['datetime', 'count']].to_csv('output.csv', index=False)