In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

ModuleNotFoundError: No module named 'google.colab'

In [0]:
%matplotlib inline

import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import scipy
import xml.etree.ElementTree as ET
import  csv

plt.rcParams['figure.figsize'] = (10.0, 10.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'


## Function for trajectory dictionary  and discretization


In [0]:
class TrajDitionary(object):
    
    def __init__(self, xmlfile):
        self.node_num = 0
        self.edge_num = 0
        self.node_dict = {}
        self.edge_dict = {}
        
        # create element tree object 
        tree = ET.parse(xmlfile) 
        root = tree.getroot() 
        self.root = root
        
    def _modify_dict_(self, key, edge_id):
        if key in self.node_dict.keys():
            self.node_dict[key].append(edge_id)
    
    def gen_node_dict(self):
        ''' create dict for node {node_id: lat, lon, edge_id} '''
        for node in self.root.findall("node"):
            id = node.get('id')
            lat = float(node.get('lat'))
            lon = float(node.get('lon'))
            self.node_dict[id] = [lat, lon]
        
        return self.node_dict
            
    def gen_edge_dict(self):
        ''' create dict for edge {edge_id: pnt_id_1, pnt_id_2....} '''
      
        for edge in self.root.findall("way"):
            id = edge.get('id')
            node_seq = []
            
            for subedge in edge.findall('nd'):
                nd = subedge.get('ref')
                node_seq.append(nd)
                self._modify_dict_(nd, id)
            self.edge_dict[id] = node_seq
        
        return self.edge_dict
    
    def node_graph_num(self):
        self.node_num = len(self.node_dict)
        out = self.node_num
        return out
    
    def edge_graph_num(self):
        self.edge_num = len(self.edge_dict)
        out = self.edge_num
        return  out

# class TrajDiscrete(object):
    
#     def __init__(self):
#         self.

In [0]:
def gen_node_file(dicts, outfile, verbose=False):
    """ if verbose=False
        output :=>  [point ID: lat, lon, edge IDs which it belongs to]
                     
        if verbose=True
        output :=>  [point ID: lat, lon]
    """    
    with open(outfile, 'w') as f:
        
        ''' all keys '''
        for key, value in dicts.items():
            f.write(str(key))
            
            ''' each key and its values '''         
            if verbose == False:
                for i in range(len(value)): 
                    f.write('\t' + str(value[i]))
            else:
                f.write('\t' + str(value[0]))
                f.write('\t' + str(value[1]))
            f.write('\n')
   
def gen_edge_file(edge_dict, node_dict, outfile, verbose=False):
    """ if verbose=True
        output :=>  [edge ID: start_point, end_point,
                     num_point, all coordinates
                     
        if verbose=False
        output :=>  [edge ID: points ID on this edge]
    """                           
    with open(outfile, 'w') as f:
        ''' all edges '''
        for key, value in edge_dict.items():
            f.write(str(key))
            value_length = len(value)
            
            ''' each edge '''
            if verbose == False:
                for i in range(value_length):
                    f.write('\t' + str(value[i]))
            else:
                f.write('\t'+value[0]+'\t'+value[-1]+'\t'+str(value_length))
                for i in range(value_length):
                    # get coordinate by point id
                    coords = node_dict.get(value[i])
#                     print(coords)
                    f.write('\t' + str(coords[0]) + '\t' + str(coords[1]))
            f.write('\n')
        
def gen_traj_file(node_dict, infile, outfile, mode='node_by_node'):
    ''' structure: 1,2,4,6,8,12,7,23, '''
    trajs = pd.read_csv(infile, delimiter='\t')
    num_trajs = max(trajs['traj_id']) - 10
    
    DIST_DELTA = 0.02
    DIST_MIN = 9999
    
    ''' dictionary to list '''
    nodeArray = []
    for key, value in node_dict.items():
        temp = [key, value[0], value[1]]
        nodeArray.append(temp)
    nodeArray = np.array(nodeArray)
    
    ''' discretization '''
    if mode == 'node_by_node':
        nodes = []
        with open(outfile, 'w') as f:
            ''' loop all trajs '''
            for id in range(num_trajs):
                ''' for each traj '''
                idx = trajs.index[trajs['traj_id'] == id+1].tolist()
                lat, lon = trajs['lat_start'][idx], trajs['lon_start'][idx]

                geo_pnts = np.array([lat, lon]).T
                dic_pnts = nodeArray[:, 1:].astype(np.float)
                result = scipy.spatial.distance.cdist(geo_pnts, dic_pnts)
                idx = result.argmin(1)
                node_id = nodeArray[idx, 0]
#                 nodes.append(node_id)
            
                for i in range(len(node_id)):
                    f.write(str(node_id[i]) + '\t')
                f.write('\n')
                
    if mode == 'node_by_edge':
        edges = []
        with open(outfile, 'w') as f:
            ''' loop all trajectory '''
            for id in range(num_trajs):
                ''' for each  trajectory'''
                idx = trajs.index[trajs['traj_id'] == id+1].tolist()
                for i in idx:
                    lat, lon = trajs['lat_start'][i], trajs['lon_start'][i]

                    ''' distance computing AND get edge id '''
                    pnt = np.array([lat, lon])
                    edge_id = get_edge_id(pnt, node_dict, nodeArray)
                    edges.append(edge_id)

                print('edges:', len(edges))
                edge_id = getContinousEdge(edges, len(edges))
                edges = []

                for i in range(len(edge_id)):
                    f.write(str(edge_id[i]) + '\t')
                f.write('\n')
            
def getContinousEdge(arr, n):
    vals = []
    prev = 0
    for i in range(0, n):
        if (arr[i] != prev):
            vals.append(arr[i])
            prev = arr[i]
    
    return vals
 
def printDistinct(arr, n):  
    # Pick all elements one by one 
    vals = []
    for i in range(0, n): 
  
        # Check if the picked element  
            # is already printed 
        d = 0
        for j in range(0, i): 
            if (arr[i] == arr[j]): 
                d = 1
                break
  
        # If not printed earlier, 
        # then print it 
        if (d == 0): 
            vals.append(arr[i]) 
            print(arr[i])
    return vals
    
def get_edge_id(pnt, nodeDict, nodeArray):
    lats = np.asarray(nodeArray[:, 1], dtype=float)
    lons = np.asarray(nodeArray[:, 2], dtype=float)
    idx = np.argmax(np.square(pnt[0]-lats) + 
                 np.square(pnt[1]-lons))
    
    nodeID = str(nodeArray[idx][0])
    value = nodeDict.get(nodeID, 'no_matching_point_id')
    
    ''' decide the edge id '''
    if len(value) == 3:
#         print(value)
#         print('here', value[2])
        return value[2]
    
    
    

                

## Dictionary Generation

In [46]:
xmlfile = '/content/gdrive/My Drive/00-colab-code/trajectory/interpreter'
nodefile = '/content/gdrive/My Drive/00-colab-code/trajectory/data/node_dict.txt'
edgefile = '/content/gdrive/My Drive/00-colab-code/trajectory/data/edge_dict.txt'
trajfile = '/content/gdrive/My Drive/00-colab-code/trajectory/data/traj_dict.txt'
gisfile = '/content/gdrive/My Drive/00-colab-code/trajectory/gis-trajs-tims.csv'
  
""" create 'graph' node and edges dictionary """
trajDict = TrajDitionary(xmlfile)
node_dict = trajDict.gen_node_dict()
edge_dict = trajDict.gen_edge_dict()

''' output dict files '''
# gen_node_file(node_dict, nodefile, verbose=True)
# gen_edge_file(edge_dict, node_dict, edgefile, verbose=True)

''' 
    mode = 'node_by_node', node is denoted by nearest graph node
    mode = 'node_by_edge', node is denoted by nearest graph edge
'''
gen_traj_file(node_dict, gisfile, trajfile, mode='node_by_node')


print('Number of nodes in the map', trajDict.node_graph_num())
print('Number of edges in the map', trajDict.edge_graph_num())

here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
here
Number of nodes in the map 49095
Number of edges in the map 6602
