In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from gensim.models import Word2Vec
import torch
import datetime


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

DATA_PATH="../data/sz_taxi_202006/"
vec_size=512

%pwd

'/home/cseadmin/dz/TrafficFlowModel/model'

In [2]:
traj_list=np.load("../data/sz_taxi_202006/sz_taxi_202006_traj_list.npy", allow_pickle=True)

len(traj_list)
traj_list[0]

1751602

[[198, Timestamp('2020-06-01 00:43:29'), 56.0],
 [199, Timestamp('2020-06-01 00:43:49'), 55.5],
 [448, Timestamp('2020-06-01 00:44:09'), 55.0]]

In [10]:
all_tracks=[]
for traj in traj_list:
    road_ids=[]
    for point in traj:
        road_ids.append(point[0])
    all_tracks.append(road_ids)

len(all_tracks)
all_tracks[0]

1751602

[198, 199, 448]

In [13]:
w2v_all_traj=Word2Vec(all_tracks, sg=1, hs=1, vector_size=vec_size, window=10, min_count=1, workers=4)

w2v_all_traj.save(os.path.join(DATA_PATH, f"w2v_all_traj_{vec_size}.model"))

model

---

In [2]:
w2v_all_traj=Word2Vec.load(os.path.join(DATA_PATH, f"w2v_all_traj_{vec_size}.model"))
wv=w2v_all_traj.wv

wv.distance(0, 3)

0.7991076111793518

In [3]:
index=np.argsort(wv.index_to_key)
embed_vectors=wv.get_normed_vectors()

embed_vectors=embed_vectors[index]
embed_vectors[0][:10]

array([-0.04961416,  0.03711881,  0.00701622, -0.02471929,  0.04195437,
        0.05551275, -0.0024164 ,  0.03509161, -0.01095492,  0.05997162],
      dtype=float32)

In [7]:
correlation_matrix = embed_vectors @ embed_vectors.T

correlation_matrix.shape
correlation_matrix[0][:10]

(492, 492)

array([1.0000002 , 0.13911077, 0.18122032, 0.20089237, 0.2514562 ,
       0.16970405, 0.21216552, 0.12217825, 0.12732378, 0.13965628],
      dtype=float32)

In [41]:
class DontKnowWhat2EatNN(torch.nn.Module):
    def __init__(self, graph_embed_vectors, num_heads=8, drop_rate=0):
        super(DontKnowWhat2EatNN, self).__init__()
        
        self.graph_embed_vectors=torch.FloatTensor(graph_embed_vectors.reshape(graph_embed_vectors.shape[0], 1, graph_embed_vectors.shape[1]))
        self.embed_dim=graph_embed_vectors.shape[-1]
        self.num_heads=num_heads
        self.drop_rate=drop_rate
        
        self.mh_attention=torch.nn.MultiheadAttention(self.embed_dim, self.num_heads, dropout=self.drop_rate)
        
    def forward(self, x):
        """
        x: (batch_size, N, in_step, num_features)
        """
        
        attn_output, attn_output_weights=self.mh_attention.forward(self.graph_embed_vectors, self.graph_embed_vectors, self.graph_embed_vectors)
        attn_output=attn_output.reshape(attn_output.shape[0], attn_output.shape[2])
        correlation_matrix=attn_output @ attn_output.T
        
        return correlation_matrix
    

model=DontKnowWhat2EatNN(embed_vectors)
model.forward(1)

tensor([[0.0183, 0.0183, 0.0183,  ..., 0.0183, 0.0183, 0.0183],
        [0.0183, 0.0183, 0.0183,  ..., 0.0183, 0.0183, 0.0183],
        [0.0183, 0.0183, 0.0183,  ..., 0.0183, 0.0183, 0.0183],
        ...,
        [0.0183, 0.0183, 0.0183,  ..., 0.0183, 0.0183, 0.0183],
        [0.0183, 0.0183, 0.0183,  ..., 0.0183, 0.0183, 0.0183],
        [0.0183, 0.0183, 0.0183,  ..., 0.0183, 0.0183, 0.0183]],
       grad_fn=<MmBackward>)

train

---

In [None]:
def train_one_epoch(model, trainset_loader, optimizer, criterion, gpu=True):
    model.train()
    batch_loss_list=[]
    for x_batch, y_batch in trainset_loader:
        if gpu and torch.cuda.is_available():
            x_batch = x_batch.cuda()
            y_batch = y_batch.cuda()

        out_batch = model.forward(x_batch)
        loss = criterion.forward(out_batch, y_batch)
        batch_loss_list.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    return sum(batch_loss_list)/len(batch_loss_list)

@torch.no_grad()
def eval_model(model, valset_loader, criterion, gpu=True):
    model.eval()
    batch_loss_list=[]
    for x_batch, y_batch in valset_loader:
        if gpu and torch.cuda.is_available():
            x_batch = x_batch.cuda()
            y_batch = y_batch.cuda()

        out_batch = model.forward(x_batch)
        loss = criterion.forward(out_batch, y_batch)
        batch_loss_list.append(loss.item())

    return sum(batch_loss_list)/len(batch_loss_list)

def train(model, trainset_loader, valset_loader, optimizer, criterion, max_epochs=100, early_stop=10, verbose=1, gpu=True, plot=True):
    wait=0
    min_val_loss=np.inf
    
    train_loss_list=[]
    val_loss_list=[]
    
    for epoch in range(max_epochs):
        train_loss=train_one_epoch(model, trainset_loader, optimizer, criterion, gpu)
        train_loss_list.append(train_loss)
        
        val_loss=eval_model(model, valset_loader, criterion)
        val_loss_list.append(val_loss)
        
        if (epoch+1)%verbose==0:
            print(datetime.datetime.now(), "Epoch", epoch,
                  "\tTrain Loss = %.5f"%train_loss,
                  "\tVal Loss = %.5f"%val_loss)
        
        if val_loss<min_val_loss:
            wait=0
            min_val_loss=val_loss
            best_epoch=epoch
        else:
            wait+=1
            if wait >= early_stop:
                print("Early stopping at epoch: %d" % epoch)
                break
        
    if plot:
        plt.plot(range(0, epoch+1), train_loss_list, "-", label="Train Loss")
        plt.plot(range(0, epoch+1), val_loss_list, "-", label="Val Loss")
        plt.title("Epoch-Loss")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")
        plt.legend()
        plt.show()
