In [1]:
import json
import zipfile
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
import csv
import operator
import Geohash
from build import TreeBuilding
from decision import haversine
from sklearn.cross_validation import train_test_split
import pickle
    
%matplotlib inline

In [2]:
air_trajs = pd.read_csv('air_trajs.csv',
                        #index_col=,
                        usecols = ['POLYLINE', 'Destination'],
                        converters={'POLYLINE': lambda x: json.loads(x), 'Destination': lambda x: json.loads(x)})

In [3]:
trajectories = list(air_trajs['POLYLINE'])
targets = list(air_trajs['Destination'])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(trajectories, targets, train_size=0.170, random_state = 0)

In [5]:
tb = TreeBuilding()
tb = tb.fit(X_train, y_train, 1, 10)

735
left
5
right
730
left
4
right
726
left
170
left
5
right
165
left
16
left
1
right
15
left
1
right
14
left
1
right
13
left
2
right
11
left
1
right
10
right
148
left
6
right
142
left
2
right
140
left
2
right
138
left
44
left
4
right
40
left
6
right
34
left
8
right
26
left
1
right
25
left
1
right
24
left
2
right
22
left
1
right
21
left
1
right
20
left
1
right
19
left
2
right
17
left
10
right
7
right
94
left
7
right
87
left
3
right
84
left
18
left
1
right
17
left
2
right
14
left
12
left
1
right
11
left
1
right
10
right
1
right
63
left
23
left
2
right
21
left
2
right
19
left
1
right
18
left
1
right
17
left
12
left
1
right
11
left
1
right
10
right
5
right
38
left
4
right
34
left
5
right
29
left
2
right
27
left
4
right
23
left
6
right
17
left
1
right
16
left
15
left
9
right
6
right
1
right
530
left
2
right
527
left
213
left
12
left
1
right
11
left
2
right
9
right
200
left
1
right
199
left
6
right
193
left
187
left
1
right
186
left
1
right
185
left
1
right
184
left
1
right
183
left
7
right


In [6]:
def prediction_error(y_val, y_hat):
    return np.mean(np.power([haversine(y_val[i], y_hat[i]) for i in xrange(len(y_val))], 2))

In [7]:
def predict_batch(traj_tree, trajectories):
    return [predict_one(traj_tree, trajectory) for trajectory in trajectories] 
        
def predict_one(traj_tree, trajectory):
    current_node = traj_tree.root
        
    for idx, point in enumerate(trajectory):
        if len(trajectory) > (idx + 1):
            if (current_node.left == None) and (current_node.right == None):
                prediction = current_node.prediction
                    
            elif (current_node.left == None) and (current_node.right != None):
                current_node = current_node.right
                    
            elif (current_node.left != None) and (current_node.right == None):
                current_node = current_node.left
                    
            else:
                if haversine(point, current_node.decision_point) < current_node.radius:
                    current_node = current_node.left
                else:
                    current_node = current_node.right
                        
        else:
            prediction = current_node.prediction
                
        #return [float(prediction[0]), float(prediction[1])]
    return prediction

In [8]:
y_hat = predict_batch(tb, X_test)

In [9]:
prediction_error(y_test, y_hat)

1663.7445599427349

In [8]:
def mean_points_alpha(points):
    lats = [point[0] for point in points]
    lons = [point[1] for point in points]
    return [np.mean(lats), np.mean(lons)]

def sqare_error_alpha(points):
    pmean = mean_points_alpha(points)
    return np.mean(np.power([haversine(point, pmean) for point in points], 2))

In [11]:
sqare_error_alpha(y_test)

496.70387895201532

In [None]:
print tb.root.decision_point

In [23]:
pickle.dump(tb, open( "tree_10_new.p", "wb" ))

In [24]:
load_tree = pickle.load( open( "tree_10_new.p", "rb" ))

In [18]:
y_hat = predict_batch(load_tree, X_test)
prediction_error(y_test, y_hat)

633.96125240566403

In [9]:
def preorder(tree, points):
    if tree is not None:
        if tree.decision_point is not None:
            preorder(tree.left, points)
            preorder(tree.right, points)
            points.append(list(tree.decision_point))
            return points
        
def order_left(tree, points):
    if tree is not None:
        if tree.decision_point is not None:
            #points.append(list(tree.decision_point))
            preorder(tree.left, points)
            #preorder(tree.right, points)
            points.append(list(tree.decision_point))
            return points
        
def get_lats_lons(points):
    lats = [point[1] for point in points]
    lons = [point[0] for point in points]
    return lats, lons

In [67]:
decision_points = []
decision_points = order_left(tb.root.right, decision_points)

In [40]:
decision_points = []
decision_points = preorder(tb.root, decision_points)
decision_points = list(reversed(decision_points))

In [41]:
lats, lons = get_lats_lons(decision_points)

In [42]:
len(decision_points)

199

In [46]:
import gmplot

gmap = gmplot.GoogleMapPlotter(41.1496100, -8.6109900 , 12)

#gmap.plot(lats, lons, 'cornflowerblue', edge_width=5)
#gmap.scatter(heat_lats, heat_lons, '#3B0B39', size=10, marker=False)
gmap.scatter(lats, lons, '#000000', size=20, marker=False)
#gmap.scatter(end_lats, end_lons, '#8B0000', size=10, marker=False)

#gmap.heatmap(heat_lats, heat_lons)

gmap.draw("plot10_new_left.html")

In [35]:
def preorder_size(tree, points, p_size):
    if tree is not None:
        if tree.decision_point is not None:
            preorder_size(tree.left, points, p_size)
            preorder_size(tree.right, points, p_size)
            points.append(list(tree.decision_point))
            p_size.append(tree.num_trajs)
            return points, p_size

In [38]:
p_coord = []
p_sizes = []

p_coord, p_sizes = preorder_size(tb.root, p_coord, p_sizes)
p_coord = list(reversed(p_coord))
p_sizes = list(reversed(p_sizes))


In [51]:
num_points = len(p_sizes)

gmap.scatter([p_coord[0][1]], [p_coord[0][0]], '#8B0000', size=20, marker=False)

for i in range(1,len(p_sizes)):
    p_size = 20.0 * p_sizes[i] / num_points
    gmap.scatter([p_coord[i][1]], [p_coord[i][0]], '#3B0B39', p_size=20, marker=False)
    
gmap.draw("plot10_new_left.html")