In [1]:
import json
import zipfile
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
import csv
import operator
import Geohash
from build import TreeBuilding
from decision import haversine
from sklearn.cross_validation import train_test_split
import pickle
    
%matplotlib inline

In [2]:
air_trajs = pd.read_csv('1_100_data.csv',
                        #index_col=,
                        usecols = ['POLYLINE', 'Destination'],
                        converters={'POLYLINE': lambda x: json.loads(x), 'Destination': lambda x: json.loads(x)})

In [3]:
trajectories = list(air_trajs['POLYLINE'])
targets = list(air_trajs['Destination'])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(trajectories, targets, train_size=0.70, random_state = 0)

In [None]:
tb = TreeBuilding()
tb = tb.fit(X_train, y_train, 1, 10)

In [None]:
def prediction_error(y_val, y_hat):
    return np.mean(np.power([haversine(y_val[i], y_hat[i]) for i in xrange(len(y_val))], 2))

In [210]:
def predict_batch(traj_tree, trajectories):
    return [predict_one(traj_tree, trajectory) for trajectory in trajectories] 
        
def predict_one(traj_tree, trajectory):
    current_node = traj_tree.root
        
    for idx, point in enumerate(trajectory):
        if len(trajectory) > (idx + 1):
            if (current_node.left == None) and (current_node.right == None):
                prediction = current_node.prediction
                break
                    
            elif (current_node.left == None) and (current_node.right != None):
                current_node = current_node.right
                    
            elif (current_node.left != None) and (current_node.right == None):
                current_node = current_node.left
                    
            else:
                if haversine(point, current_node.decision_point) < current_node.radius:
                    current_node = current_node.left
                else:
                    current_node = current_node.right
                        
        else:
            prediction = current_node.prediction
            break
                
        #return [float(prediction[0]), float(prediction[1])]
    return prediction

In [222]:
y_hat = predict_batch(tb, X_test)

In [223]:
prediction_error(y_test, y_hat)

29.878948260618863

In [None]:
def mean_points_alpha(points):
    lats = [point[0] for point in points]
    lons = [point[1] for point in points]
    return [np.mean(lats), np.mean(lons)]

def sqare_error_alpha(points):
    pmean = mean_points_alpha(points)
    return np.mean(np.power([haversine(point, pmean) for point in points], 2))

In [224]:
sqare_error_alpha(y_test)

26.904650463265202

In [145]:
print tb.root.decision_point

[ -8.597682  41.187717]


In [None]:
def preorder(tree, points):
    if tree is not None:
        if tree.decision_point is not None:
            preorder(tree.left, points)
            preorder(tree.right, points)
            points.append(list(tree.decision_point))
            return points
        
def order_left(tree, points):
    if tree is not None:
        if tree.decision_point is not None:
            #points.append(list(tree.decision_point))
            preorder(tree.left, points)
            #preorder(tree.right, points)
            points.append(list(tree.decision_point))
            return points
        
def get_lats_lons(points):
    lats = [point[1] for point in points]
    lons = [point[0] for point in points]
    return lats, lons

In [159]:
decision_points = []
decision_points = order_left(tb.root, decision_points)

In [147]:
decision_points = []
decision_points = preorder(tb.root, decision_points)
decision_points = list(reversed(decision_points))

In [160]:
lats, lons = get_lats_lons(decision_points)

In [161]:
len(decision_points)

1

In [151]:
import gmplot

gmap = gmplot.GoogleMapPlotter(41.237137, -8.671403, 12)

gmap.plot(lats, lons, 'cornflowerblue', edge_width=5)
#gmap.scatter(heat_lats, heat_lons, '#3B0B39', size=10, marker=False)
#gmap.scatter(lats, lons, '#000000', size=20, marker=False)
#gmap.scatter(end_lats, end_lons, '#8B0000', size=10, marker=False)

#gmap.heatmap(heat_lats, heat_lons)

gmap.draw("plot10_new_left.html")

In [None]:
def preorder_size(tree, points, p_size):
    if tree is not None:
        if tree.decision_point is not None:
            preorder_size(tree.left, points, p_size)
            preorder_size(tree.right, points, p_size)
            points.append(list(tree.decision_point))
            p_size.append(tree.num_trajs)
            return points, p_size

In [162]:
p_coord = []
p_sizes = []

p_coord, p_sizes = preorder_size(tb.root, p_coord, p_sizes)
p_coord = list(reversed(p_coord))
p_sizes = list(reversed(p_sizes))


In [164]:
gmap = gmplot.GoogleMapPlotter(41.1496100, -8.6109900 , 12)
num_points = p_sizes[0]

gmap.scatter([p_coord[0][1]], [p_coord[0][0]], '#000000', size=40, marker=False)

for i in range(1,len(p_sizes)):
    p_size = 39. * p_sizes[i] / num_points + 1
    #print p_size
    gmap.scatter([p_coord[i][1]], [p_coord[i][0]], '#3B0B39', size = p_size, marker=False)
    
gmap.draw("plot_test_all.html")

In [64]:
p_coord[0][1]

41.237369999999999

In [65]:
p_coord[0][0]

-8.6698350000000008

In [134]:
X_train, X_test, y_train, y_test = train_test_split(trajectories, targets, train_size=0.01, random_state = 0)

In [136]:
#visualization of trajectories

colors = [name.encode() for name, hex in mpl.colors.cnames.iteritems()]

gmap = gmplot.GoogleMapPlotter(41.1496100, -8.6109900 , 12)

for i in range(len(X_train)):
    lats, lons = get_lats_lons(X_train[i])
    gmap.plot(lats, lons, colors[i], edge_width=5)
    
gmap.draw("plot10_new_left.html")
    

In [200]:
def dec_points(traj_tree, trajectory):
    current_node = traj_tree.root
    dec_points = []  
    
    for idx, point in enumerate(trajectory):
        if len(trajectory) > (idx + 1):
            if (current_node.left == None) and (current_node.right == None):
                dec_points.append(current_node.prediction)
                break
                    
            elif (current_node.left == None) and (current_node.right != None):
                print idx, "right"
                current_node = current_node.right
                dec_points.append(current_node.decision_point)
                    
            elif (current_node.left != None) and (current_node.right == None):
                print idx, 'left'
                current_node = current_node.left
                dec_points.append(current_node.decision_point)
                    
            else:
                if haversine(point, current_node.decision_point) < current_node.radius:
                    print idx, 'left'
                    current_node = current_node.left
                    dec_points.append(current_node.decision_point)
                else:
                    current_node = current_node.right
                    print idx, 'right'
                    dec_points.append(current_node.decision_point)
                        
        else:
            dec_points.append(current_node.prediction)
            break
                
        #return [float(prediction[0]), float(prediction[1])]
    return dec_points

In [225]:
pickle.dump(tb, open( "tree_250_data.p", "wb" ))

In [230]:
test_traj = X_test[10]
predicted_traj = [list(p) for p in dec_points(tb, test_traj) if p is not None]

gmap = gmplot.GoogleMapPlotter(41.1496100, -8.6109900 , 12)

lats, lons = get_lats_lons(test_traj)
gmap.plot(lats, lons, colors[0], edge_width=5)

lats, lons = get_lats_lons(predicted_traj)
gmap.plot(lats, lons, colors[2], edge_width=5)

gmap.scatter([predicted_traj[-1][1]], [predicted_traj[-1][0]], 'black', size = 20, marker=False)
gmap.scatter([test_traj[-1][1]], [test_traj[-1][0]], 'red', size = 20, marker=False)

gmap.draw("plot10_new_left.html")

0 right
1 right
2 right
3 right
4 right
5 right
6 right
7 right
8 right
9 right
10 right
11 right
12 right
13 right
14 left
15 right
16 right
17 left
18 right
19 right
20 right
21 right


In [180]:
print [predicted_traj[-1][1]], [predicted_traj[-1][0]]


[41.151036600000005] [-8.5843403999999985]
