In [34]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors, ensemble, kernel_ridge, svm
from sklearn.linear_model import LinearRegression

## Load data

In [35]:
# Ignore ORIGIN_CALL and other categorical columns
train_arr = np.loadtxt("../data/no_coord_train.csv", dtype=np.float32, delimiter=",", skiprows=1)[:, 4:]
train_arr = train_arr[train_arr[:, -1] < 15000]
train_arr = train_arr[train_arr[:, -1] > 50]
# Split into training/validation sets
np.random.shuffle(train_arr)
valid_arr = train_arr[int(0.9 * train_arr.shape[0]):]
train_arr = train_arr[:int(0.9 * train_arr.shape[0])]

train_label = train_arr[:, -1]
train = train_arr[:, :-1]

valid_label = valid_arr[:, -1]
valid = valid_arr[:, :-1]

In [38]:
max(train_label)

4155.0

In [36]:
def score(model):
    pred = model.predict(valid)
    print(f"RMSE: {np.sqrt(np.sum((pred - valid_label)**2) / len(valid))}")

## Models

In [37]:
lin_reg = LinearRegression()
lin_reg.fit(train, train_label)
score(lin_reg)

RMSE: 441.217188522904


### KNN Regressor

In [21]:
KNN_Regressor = neighbors.KNeighborsRegressor(n_neighbors=10)
KNN_Regressor.fit(train, train_label)

In [22]:
score(KNN_Regressor)

RMSE: 453.0193377589577


### Gradient Boosting

In [7]:
grad_boost = ensemble.GradientBoostingRegressor(learning_rate=0.001, n_estimators=200)
grad_boost.fit(train, train_label, sample_weight=train_label)

In [8]:
score(grad_boost)

RMSE: 518.5153579783661


### Random Forest Regression

In [9]:
random_forest = ensemble.RandomForestRegressor(n_estimators=200, n_jobs=2)
random_forest.fit(train, train_label, sample_weight=train_label)

In [10]:
score(random_forest)

RMSE: 432.5875090878751


## Create prediction csv

In [None]:
import pandas as pd
# Ignore ORIGIN_CALL and other categorical columns
test = np.loadtxt("../data/no_coord_testvf.csv", delimiter=",", skiprows=1)[:, 3:]
df_pred = pd.read_csv("../data/sampleSubmission.csv")
KNN_Regressor.predict(test)

In [None]:
df_pred["TRAVEL_TIME"] = preds
df_pred.to_csv("./predictions.csv", index=False)