In [1]:
import pandas as pd
from sklearn.cross_validation import train_test_split
import xgboost as xgb
import numpy as np

import glob

In [2]:
def predict(clf, places, check_in):
    proba = clf.predict_proba(np.array([[check_in["x"],check_in["y"],check_in["accuracy"],check_in["hour_of_day"],check_in["day_of_week"]]]))[0]
    return ' '.join([p[0] for p in sorted(zip(places, proba), key=lambda x: -x[1])[:3]])

In [3]:
def make_predictions(suffix):
    print("Reading training data")
    train_df = pd.read_csv('data/grid_data/train/train_' + suffix, dtype={"place_id": str})
    places = train_df['place_id'].unique()
    places_index = {p[1]:p[0] for p in enumerate(places)}
    
    X_train = train_df[['x', 'y', 'accuracy', 'hour_of_day', 'day_of_week']].values
    y_train = train_df.apply(lambda r: places_index[r["place_id"]], axis=1).values
    
    print("Training classifier")
    clf = xgb.XGBClassifier(max_depth=10, n_estimators=15, learning_rate=0.1, objective='multi:softprob',
                        nthread=4, subsample=1.0, colsample_bytree=0.85, silent=1, seed=7171)
    clf.fit(X_train, y_train)
    
    print("Making submission")
    test_df = pd.read_csv('data/grid_data/test/test_' + suffix)
    test_df["place_id"] = test_df.apply(lambda r: predict(clf, places, r), axis=1)
    test_df[["row_id", "place_id"]].to_csv("data/grid_data/submission/submission_" + suffix, index=False, header=False)

In [5]:
for i in range(9800, 10000):
    suffix = glob.glob("data/grid_data/train/train_" + str(i) + "_*")[0][27:]
    print('Making predictions for suffix ' + suffix)
    make_predictions(suffix)

Making predictions for suffix 9800_52x55.csv
Reading training data
Training classifier
Making submission
Making predictions for suffix 9801_64x90.csv
Reading training data
Training classifier
Making submission
Making predictions for suffix 9802_1x32.csv
Reading training data
Training classifier
Making submission
Making predictions for suffix 9803_53x53.csv
Reading training data
Training classifier
Making submission
Making predictions for suffix 9804_35x98.csv
Reading training data
Training classifier
Making submission
Making predictions for suffix 9805_55x33.csv
Reading training data
Training classifier
Making submission
Making predictions for suffix 9806_97x83.csv
Reading training data
Training classifier
Making submission
Making predictions for suffix 9807_90x13.csv
Reading training data
Training classifier
Making submission
Making predictions for suffix 9808_41x35.csv
Reading training data
Training classifier
Making submission
Making predictions for suffix 9809_76x98.csv
Reading tra