In [1]:
import pandas as pd
import numpy as np
import pickle
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Test for Submission

In [2]:
num_feature = 10
scaler = pickle.load(open('misc/scaler.pickle', 'rb'))

print('INFO: Begin preprocessing...')
selected_col = ['hash', 'x_entry', 'y_entry', 'x_exit', 'y_exit']
test = pd.read_csv('data_test.csv').drop('Unnamed: 0', axis=1)
test = test[selected_col]

test_hash = test.hash.unique()
test_ped_data = []

hash_count = test_hash.shape[0]
counter = 1
print('INFO: Separate data by Hash. Total hash: {}'.format(hash_count))

for hsh in test_hash:
    filtered_data = test.loc[test.hash == hsh].drop('hash', axis=1).values

    tmp_arr = []
    for row in filtered_data:
        tmp_arr.append(list(row[:2]))
        tmp_arr.append([row[2], row[3]])

    tmp_arr = np.array(tmp_arr).T

    if tmp_arr.shape[1] <= num_feature:
        count_miss = num_feature - tmp_arr.shape[1] + 1
        arrT = tmp_arr.T
        first_elm = np.array([arrT[0]])
        for x in range(count_miss):
            arrT = np.concatenate((first_elm, arrT))

    elif tmp_arr.shape[1] > num_feature + 1:
        arrT = tmp_arr.T
        arrT = arrT[-(num_feature+1):]
        
    tmp_arr = arrT[:-1].T
    test_ped_data.append(scaler.transform(tmp_arr))

    print("INFO: Progress: {}".format(counter), end="\r")
    counter += 1

test_ped_data = np.array(test_ped_data)

INFO: Begin preprocessing...
INFO: Separate data by Hash. Total hash: 33515
INFO: Progress: 33515

In [None]:
test_ped_data.shape

In [3]:
test_ready_data = (test_hash, test_ped_data)
pickle.dump(test_ready_data, open("misc/test_ready_data.pickle", "wb"))

In [None]:
# Load Test Ready data
test_hash, test_ped_data = pickle.load(open("misc/test_ready_data.pickle", "rb"))
# test_ped_data

In [4]:
# Load Model
num_feature = 10
rnn_size = 512

model = Sequential()
model.add(
    LSTM(rnn_size, 
         input_shape=(2, num_feature)
    )
)
model.add(Dense(2))
model.load_weights('misc/keras_lstm.h5')

In [5]:
pred_Y = model.predict(test_ped_data)
pred_Y = scaler.inverse_transform(pred_Y)
pred_Y[:10]

array([[  3755459.2, -19161368. ],
       [  3748923. , -19342528. ],
       [  3751347. , -19234874. ],
       [  3747429.2, -19330778. ],
       [  3754520.8, -19211542. ],
       [  3756649.2, -19195170. ],
       [  3752851.8, -19248304. ],
       [  3755731.2, -19163004. ],
       [  3752868.2, -19208434. ],
       [  3754711.2, -19238338. ]], dtype=float32)

In [None]:
pred_Y.shape

In [15]:
# Read trajectory id
selected_col_traj = ['hash', 'trajectory_id', 'x_entry', 'y_entry', 'x_exit', 'y_exit']
test_traj = pd.read_csv('data_test.csv').drop('Unnamed: 0', axis=1)
test_traj = test_traj[selected_col].loc[test_traj.x_exit.isna()]

hash_traj = {}
for row in test_traj.values:
    hsh = row[0]
    traj = row[1]
    hash_traj[hsh] = traj

In [17]:
f = open('misc/rnn_result.csv', 'w')
f.write('id,target\n')

for i in range(len(pred_Y)):
    hsh = test_hash[i]
    traj = hash_traj[hsh]
    x_cor = pred_Y[i][0]
    y_cor = pred_Y[i][1]
    
    if (3750901.5068 <= x_cor <= 3770901.5068) and (-19268905.6133 <= y_cor <= -19208905.6133):
        in_city = 1
    else:
        in_city = 0
        
    f.write("{},{}\n".format(traj, in_city))
    print(i, end='\r')

33514

# Test from Training

In [18]:
trained_ready_data = pickle.load(open('misc/trained_ready_data.pickle', 'rb'))
training_X, training_Y, dev_X, dev_Y, testing_X, testing_Y = trained_ready_data

scaler = pickle.load(open('misc/scaler.pickle', 'rb'))

In [27]:
training_X.shape

(90584, 2, 10)

In [23]:
np.concatenate((training_X, dev_X)).shape

(120779, 2, 10)

In [29]:
np.concatenate((training_Y, dev_Y)).shape

(120779, 2)

In [None]:
np.array(training_X).shape[1]

In [None]:
# Load model
num_feature = 10
rnn_size = 512

model = Sequential()
model.add(
    LSTM(rnn_size, 
         input_shape=(2, num_feature)
    )
)
model.add(Dense(2))
model.load_weights('misc/keras_lstm.h5')

In [None]:
testing_X = np.array(testing_X)
testing_Y = np.array(testing_Y)

In [None]:
predict_Y = model.predict(testing_X[0:10])

In [None]:
scaler.inverse_transform(testing_Y[0:10])

In [None]:
scaler.inverse_transform(predict_Y)

## Explore

In [None]:
import numpy as np

In [None]:
arr = np.array([[1,2,3],[6,7,8]])
arrT = arr.T
first_elm = arrT[0]
for x in range(3):
    arrT = np.concatenate((np.array([[1,6]]), arrT))

arr = arrT.T

In [None]:
np.concatenate(([[1,6]], arr.T))

In [None]:
arr.T

In [None]:
dct = dict(a=1,b=2,c=3)
print(dct)

for k, v in dct.items():
    dct[k] = 5

print(dct)

In [None]:
ax = np.array([
    [[1,2,3,4,5],
     [6,7,8,9,10]],
    [[11,12,13,14,15],
     [16,17,18,19,20]],
    [[21,22,23,24,25],
     [26,27,28,29,30]]
])
ay = np.array([
    [1,2],
    [2,3],
    [3,4]
])

In [None]:
ax.reshape(15,2,)

In [None]:
scaler = MinMaxScaler(feature_range=(-1,1))
scaler = scaler.fit(np.array(range(31)).reshape(-1,1))

In [None]:
ay_scaled = scaler.transform(ay)
ay_scaled

In [None]:
scaler.transform(ay.T)

In [None]:
scaler.inverse_transform(ay_scaled)

In [None]:
from concurrent.futures import ProcessPoolExecutor, as_completed
import time
import asyncio

In [None]:
def times_2(x):
    return x*2



In [None]:
arr = range(100)
start_time = time.time()

for x in arr:
    print('{} {}'.format(x, times_2(x)), end='\r')

elapsed_time = time.time() - start_time
print(elapsed_time)

In [None]:
10**12

In [None]:
from concurrent.futures import ProcessPoolExecutor, as_completed
import math


def factorize_naive(n):
    """ A naive factorization method. Take integer 'n', return list of
        factors.
    """
    if n < 2:
        return []
    factors = []
    p = 2

    while True:
        if n == 1:
            return factors

        r = n % p
        if r == 0:
            factors.append(p)
            n = n // p
        elif p * p >= n:
            factors.append(n)
            return factors
        elif p > 2:
            # Advance in steps of 2 over odd numbers
            p += 2
        else:
            # If p == 2, get to 3
            p += 1
    assert False, "unreachable"


def chunked_worker(nums):
    """ Factorize a list of numbers, returning a num:factors mapping.
    """
    return {n: factorize_naive(n) for n in nums}


def pool_factorizer_chunked(nums, nprocs):
    # Manually divide the task to chunks of equal length, submitting each
    # chunk to the pool.
    chunksize = int(math.ceil(len(nums) / float(nprocs)))
    futures = []

    with ProcessPoolExecutor() as executor:
        for i in range(nprocs):
            chunk = nums[(chunksize * i): (chunksize * (i + 1))]
            futures.append(executor.submit(chunked_worker, chunk))

    resultdict = {}
    for f in as_completed(futures):
        resultdict.update(f.result())
    return resultdict

In [None]:
nums = [25, 36, 42, 88, 99]
start_time = time.time()
print(pool_factorizer_chunked(nums, 4))
elapsed_time = time.time() - start_time
print(elapsed_time)

In [None]:
start_time = time.time()
for n in nums:
    print(factorize_naive(n))

elapsed_time = time.time() - start_time
print(elapsed_time)