In [190]:
import pandas as pd
import numpy as np
from numpy.linalg import inv as inv

In [191]:
df_train = pd.read_csv("./data/train.csv",date_parser=['date'])
df_test = pd.read_csv("./data/test.csv",date_parser=['date'])
df = pd.read_csv("data for svt.csv")

In [192]:
def ten2mat(tensor, mode):
    return np.reshape(np.moveaxis(tensor, mode, 0), (tensor.shape[mode], -1), order = 'F')

In [193]:
def mat2ten(mat, tensor_size, mode):
    index = list()
    index.append(mode)
    for i in range(tensor_size.shape[0]):
        if i != mode:
            index.append(i)
    return np.moveaxis(np.reshape(mat, list(tensor_size[index]), order = 'F'), 0, mode)

In [194]:
def compute_mape(var, var_hat):
    return np.sum(np.abs(var - var_hat) / var) / var.shape[0]

def compute_rmse(var, var_hat):
    return  np.sqrt(np.sum((var - var_hat) ** 2) / var.shape[0])

In [195]:
def svt_tnn(mat, alpha, rho, theta):
    """This is a Numpy dependent singular value thresholding (SVT) process."""
    u, s, v = np.linalg.svd(mat, full_matrices = 0)
    vec = s.copy()
    vec[theta :] = s[theta :] - alpha / rho
    vec[vec < 0] = 0
    return np.matmul(np.matmul(u, np.diag(vec)), v)

In [196]:
def LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter):
    """Low-Rank Tenor Completion with Truncated Nuclear Norm, LRTC-TNN."""
    
    dim = np.array(sparse_tensor.shape)
    pos_missing = np.where(sparse_tensor == 0)
    pos_test = np.where((dense_tensor != 0) & (sparse_tensor == 0))
    
    X = np.zeros(np.insert(dim, 0, len(dim))) # \boldsymbol{\mathcal{X}}
    T = np.zeros(np.insert(dim, 0, len(dim))) # \boldsymbol{\mathcal{T}}
    Z = sparse_tensor.copy()
    last_tensor = sparse_tensor.copy()
    snorm = np.sqrt(np.sum(sparse_tensor ** 2))
    it = 0
    while True:
        rho = min(rho * 1.05, 1e5)
        for k in range(len(dim)):
            X[k] = mat2ten(svt_tnn(ten2mat(Z - T[k] / rho, k), alpha[k], rho, np.int(np.ceil(theta * dim[k]))), dim, k)
        Z[pos_missing] = np.mean(X + T / rho, axis = 0)[pos_missing]
        T = T + rho * (X - np.broadcast_to(Z, np.insert(dim, 0, len(dim))))
        tensor_hat = np.einsum('k, kmnt -> mnt', alpha, X)
        tol = np.sqrt(np.sum((tensor_hat - last_tensor) ** 2)) / snorm
        last_tensor = tensor_hat.copy()
        it += 1
        if (it + 1) % 50 == 0:
            print('Iter: {}'.format(it + 1))
            print('RMSE: {:.6}'.format(compute_rmse(dense_tensor[pos_test], tensor_hat[pos_test])))
            print()
        if (tol < epsilon) or (it >= maxiter):
            break

    print('Imputation MAPE: {:.6}'.format(compute_mape(dense_tensor[pos_test], tensor_hat[pos_test])))
    print('Imputation RMSE: {:.6}'.format(compute_rmse(dense_tensor[pos_test], tensor_hat[pos_test])))
    print()
    
    return tensor_hat


In [197]:
df = df[['speed']] 

In [198]:
np_data = np.array(df).T

In [199]:
dense_tensor = np_data.reshape(1,-1,24)
np.random.seed(102)
random_tensor = np.random.random(size=[1,730,24])

missing_rate = 0.02

### Random missing (RM) scenario:
binary_tensor = np.round(random_tensor + 0.5 - missing_rate)
sparse_tensor = np.multiply(dense_tensor, binary_tensor)

In [200]:
import time
start = time.time()
alpha = np.ones(3) / 3
rho = 1e-5
theta = 0.25
epsilon = 1e-5
maxiter = 120
tensor_hat=LRTC(dense_tensor, sparse_tensor, alpha, rho, theta, epsilon, maxiter)
end = time.time()
print('Running time: %d seconds'%(end - start))

Iter: 50
RMSE: 3.03952

Iter: 100
RMSE: 3.07426

Imputation MAPE: 0.0885956
Imputation RMSE: 2.86736

Running time: 0 seconds


In [201]:
tensor_hat=tensor_hat.reshape(1,1,-1).squeeze()

In [202]:
array_df = np.array(df) 
test_pos,_ = np.where(array_df==0)

In [203]:
test_pos[10:]

array([ 8762,  8765,  8767, ..., 17517, 17518, 17519])

In [204]:
def samplesubmission():
    speed = []
    for i in test_pos[10:]:
        speed.append(tensor_hat[i])
    return speed

In [205]:
speed=samplesubmission()

In [206]:
speed

[48.37568388442594,
 47.97728455003376,
 35.847792713402924,
 30.294842939063546,
 39.75754479346644,
 24.83710184292985,
 21.287468032457753,
 49.661406892238595,
 49.0486954996884,
 31.19975633279485,
 26.65105900888297,
 23.69518902995597,
 25.048362572557604,
 28.34286804495531,
 26.694313566107457,
 41.73092222857515,
 42.11835776109946,
 46.03248994722438,
 49.04236639042592,
 48.89675973383589,
 47.71901254196612,
 45.31959929584513,
 20.120094712360846,
 26.37352196631344,
 25.19015194145324,
 19.168345643344736,
 20.429920265510333,
 19.61550364930966,
 15.49936657334436,
 13.24393023725133,
 23.92785931863739,
 45.28056579496999,
 49.25968304369039,
 30.40379354505993,
 25.079034921695335,
 22.136052664200086,
 17.954851697645932,
 18.263280114993876,
 43.97909376432859,
 37.26235587542534,
 40.38798033843611,
 47.76472683088827,
 47.96156607700952,
 44.72917754104245,
 18.7835571787161,
 15.857142132714463,
 17.143551551538877,
 10.044541505669526,
 44.80511414876625,
 47.86

In [207]:
submission = pd.read_csv("./data/sampleSubmission.csv",index_col=0)
submission

Unnamed: 0_level_0,speed
id,Unnamed: 1_level_1
0,0
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0
9,0


In [208]:
submission.speed = speed
submission

Unnamed: 0_level_0,speed
id,Unnamed: 1_level_1
0,48.375684
1,47.977285
2,35.847793
3,30.294843
4,39.757545
5,24.837102
6,21.287468
7,49.661407
8,49.048695
9,31.199756


In [209]:
submission.to_csv("sampleSubmission.csv")