Imports

In [300]:
import pandas as pd
import numpy as np
from datetime import datetime
import copy
from sklearn import linear_model

pd.options.mode.chained_assignment = None

Load data

In [301]:
train = pd.read_csv('bicikelj_train.csv')
test = pd.read_csv('bicikelj_test.csv')

Feature creation

In [302]:
train["timestamp"] = [pd.to_datetime(ts).tz_localize(None) for ts in train["timestamp"].values]
test["timestamp"] = [pd.to_datetime(ts).tz_localize(None) for ts in test["timestamp"].values]

times = train["timestamp"].values

classes = train.columns[1:]

In [303]:
def get_closest_time(t):
    closest = np.argmin(np.abs(times - np.full_like(times, t)))
    #print(t, times[closest])
    #print("Closest: ", train.iloc[closest, 0], " index: ", closest, end='\r')
    #return train.iloc[closest, 0]
    return closest

In [304]:

print("Creating features for train set...")
cnt = 0

# Add new columns to train set
train['hour'] = 0
train['day'] = 0
train['month'] = 0
train['weekday'] = 0
train['bikes_before_60'] = 0
train['bikes_before_90'] = 0
train['bikes_before_120'] = 0
train['bikes_before_180'] = 0

# Add new columns to test set
test['hour'] = 0
test['day'] = 0
test['month'] = 0
test['weekday'] = 0
test['bikes_before_60'] = 0
test['bikes_before_90'] = 0
test['bikes_before_120'] = 0
test['bikes_before_180'] = 0

for t in train['timestamp']:
    cnt += 1
    print(cnt, '/', str(len(train['timestamp'])), end='\r')

    row = train[train['timestamp'] == t]
    row['hour'] = t.hour
    row['day'] = t.day
    row['month'] = t.month
    row['weekday'] = t.weekday()

    row['bikes_before_60'] = get_closest_time(t - pd.Timedelta(minutes=60))
    row['bikes_before_90'] = get_closest_time(t - pd.Timedelta(minutes=90))
    row['bikes_before_120'] = get_closest_time(t - pd.Timedelta(minutes=120))
    row['bikes_before_180'] = get_closest_time(t - pd.Timedelta(minutes=180))

    train.loc[train['timestamp'] == t] = row

print("Creating features for test set...")
cnt = 0
for t in test['timestamp']:
    cnt += 1
    print(cnt, '/', str(len(test['timestamp'])), end='\r')

    row = test[test['timestamp'] == t]
    row['hour'] = t.hour
    row['day'] = t.day
    row['month'] = t.month
    row['weekday'] = t.weekday()

    row['bikes_before_60'] = get_closest_time(t - pd.Timedelta(minutes=60))
    row['bikes_before_90'] = get_closest_time(t - pd.Timedelta(minutes=90))
    row['bikes_before_120'] = get_closest_time(t - pd.Timedelta(minutes=120))
    row['bikes_before_180'] = get_closest_time(t - pd.Timedelta(minutes=180))

    test.loc[test['timestamp'] == t] = row

print("Done!")

Creating features for train set...
Creating features for test set...
Done!40


Training and predicting

In [305]:
cnt = 0
for c in classes:
    cnt += 1
    print('                                                                                                                              ', end='\r')
    print(cnt, '/', str(len(classes)), ' ', c, end='\r')

    X = train[train.columns[-8:]]
    y = train[c]

    bikes_before_60 = []
    bikes_before_90 = []
    bikes_before_120 = []
    bikes_before_180 = []

    for i in range(0, len(X)):
        bikes_before_60.append(train.iloc[X.iloc[i]['bikes_before_60']][c])
        bikes_before_90.append(train.iloc[X.iloc[i]['bikes_before_90']][c])
        bikes_before_120.append(train.iloc[X.iloc[i]['bikes_before_120']][c])
        bikes_before_180.append(train.iloc[X.iloc[i]['bikes_before_180']][c])


    X.loc[:, 'bikes_before_60'] = bikes_before_60
    X.loc[:, 'bikes_before_90'] = bikes_before_90
    X.loc[:, 'bikes_before_120'] = bikes_before_120
    X.loc[:, 'bikes_before_180'] = bikes_before_180

    X = X.drop(columns=['day', 'month', 'hour', 'weekday'], axis=1)

    X_test = test[test.columns[-8:]]

    bikes_before_60 = []
    bikes_before_90 = []
    bikes_before_120 = []
    bikes_before_180 = []

    for i in range(0, len(X_test)):
        bikes_before_60.append(train.iloc[X_test.iloc[i]['bikes_before_60']][c])
        bikes_before_90.append(train.iloc[X_test.iloc[i]['bikes_before_90']][c])
        bikes_before_120.append(train.iloc[X_test.iloc[i]['bikes_before_120']][c])
        bikes_before_180.append(train.iloc[X_test.iloc[i]['bikes_before_180']][c])

    X_test.loc[:, 'bikes_before_60'] = bikes_before_60
    X_test.loc[:, 'bikes_before_90'] = bikes_before_90
    X_test.loc[:, 'bikes_before_120'] = bikes_before_120
    X_test.loc[:, 'bikes_before_180'] = bikes_before_180

    X_test = X_test.drop(columns=['day', 'month', 'hour', 'weekday'], axis=1)

    model = linear_model.LinearRegression()
    model.fit(X, y)

    y_pred = model.predict(X_test)

    test[c] = y_pred

83 / 83   SUPERNOVA LJUBLJANA - RUDNIK                                                                                        

Remove extra columns

In [309]:
test = test.drop(['hour', 'day', 'month', 'weekday', 'bikes_before_60', 'bikes_before_90', 'bikes_before_120', 'bikes_before_180'], axis=1)

Save result

In [307]:
now = datetime.now()

test.to_csv('bicikelj_out_' + now.strftime("%d_%m-%H-%M-%S") + '.csv', index=False)