In [1]:
import pandas as pd
import numpy as np
import pickle as pk
from scipy.optimize import curve_fit

In [76]:
df = pd.read_csv('1_min_labeled.csv')

In [77]:
df = df[['Vehicle_ID','Global_Time','Local_X','Local_Y','v_Width','v_Vel','v_Acc','Preceding','lateral_vel','lateral_acc','mean_vel','behavior']]

In [78]:
df[:5]

Unnamed: 0,Vehicle_ID,Global_Time,Local_X,Local_Y,v_Width,v_Vel,v_Acc,Preceding,lateral_vel,lateral_acc,mean_vel,behavior
0,36,1113433135300,27.126,75.567,8.5,11.08,0.0,0,0.0,0.0,11.08,2
1,36,1113433135400,27.126,76.568,8.5,11.08,0.0,0,0.0,0.0,11.08,2
2,36,1113433135500,27.126,77.567,8.5,11.08,0.0,0,0.0,0.0,11.08,2
3,36,1113433135600,27.127,78.067,8.5,11.08,0.0,0,0.01,0.1,11.08,2
4,36,1113433135700,27.625,81.067,8.5,11.08,0.0,0,4.98,49.7,11.08,2


### Behaviors:
Sorted by priority. E.g. Lane changing may contrain braking action. But we treat it as lane changing when within the intention to action dealy period
* Lane changing to the left;
* Lane changing to the right;
* Acceleration;
* Braking;
* Lane following;

In [79]:
def calc_features(vehicles, ego_id, v_mean):
    
    ego = vehicles[ego_id]
    if ego.Preceding in vehicles.keys():
        front = vehicles[ego.Preceding]
    else:
        front = None
    
    if len(vehicles) > 1:
        v_mean = (v_mean * len(vehicles) - ego.v_Vel) / (len(vehicles)-1)
    
    lane_width = 12
    
    f = np.zeros(11)
    
    f[1] = ego.v_Acc # because the angle is small, we use a to approximate longitudinal acceleration
    f[2] = 0 # No deceleration light info in data
    f[3] = ego.lateral_acc
    f[4] = ego.v_Vel - v_mean
    f[5] = ego.v_Vel - front.v_Vel if front is not None else 0
    f[6] = -1 if min(ego.Local_X % lane_width, lane_width - (ego.Local_X % lane_width)) - (ego.v_Width/2) < 0 else 0
    f[7] = 0
    f[8] = ego.lateral_vel # calculated by Local_X derivative
    f[9] = (ego.Local_X % lane_width) - (lane_width / 2)
    f[10] = lane_width
    
    return f



In [80]:
dT = 0.1
x = list()
y = list()
vehicles = dict()
show_up = set()
vel_sum = 0
    

for i in range(len(df)):
    r = df.iloc[i]
    v_id = r.Vehicle_ID

    show_up.add(v_id)
    vehicles[v_id] = r.copy()

    f = calc_features(vehicles, v_id, r.mean_vel)
    x.append(f)
    y.append(r.behavior)

    if i == len(df)-1 or r.Global_Time != df.iloc[i+1].Global_Time:
        for v_id in vehicles.keys():
            if v_id not in show_up:
                vehicles.remove(v_id)

x = np.array(x)
y = np.array(y).astype(np.int)
print(np.shape(x))
print(np.shape(y))

(101475, 11)
(101475,)


In [81]:
np.where(np.isnan(x))

(array([], dtype=int64), array([], dtype=int64))

In [83]:
def gen_train_data_label(x, y, filt_func):
    
    mask = filt_func(y)
    z = np.where(mask)[0]
    p = np.where(~mask)[0]
    if len(z) > len(p):
        z,p = p,z # make sure p is the longer one

    np.random.shuffle(p)
    p = p[:len(z)] # make sure two classes have same number of data in expectation
    p = np.concatenate([p,z])
        
    np.random.shuffle(p)
    data = np.array(x)[p,:]
    label = np.array(y)[p]

    mask = np.zeros_like(label,dtype=bool)
    mask[filt_func(label)] = True
    label[mask] = 1
    label[~mask] = 0
    return (data, label)

In [84]:
from sklearn.metrics import confusion_matrix

In [85]:
def sgn(x):
    return (x > 0) * 2 - 1     
def test(func, data, label, coef):
    pred = func(data, *coef) > 0.5
    print('accuracy:', 1 - np.sum(abs(pred - label)) / len(label))
    tn, fp, fn, tp = confusion_matrix(label, pred).ravel()
    print(tn, fn)
    print(fp, tp)

In [191]:

def changing_lane(x, c1, c2, c3):
    pred = 1 / (1 + np.exp(c1 * abs(x[:,8]) + c2 * abs(x[:,9]) + c3))
    return pred

changing_data, changing_label = gen_train_data_label(x, y, lambda y: y < 2)
changing_lane_coef, pcov = curve_fit(changing_lane, changing_data, changing_label)
print(changing_lane_coef)
test(changing_lane, changing_data, changing_label, changing_lane_coef)

rfc = RandomForestClassifier(n_estimators=100, max_depth=5)
rfc.fit(changing_data, changing_label)
print(rfc.score(changing_data, changing_label))



[-1.39253014 -0.73561408  2.64879195]
accuracy: 0.799846625767
2184 620
424 1988
0.840107361963


In [192]:
f = open('random_forest_changing.txt', 'wb')
pk.dump(rfc, f) 

In [168]:
def accelerating(x, c1, c2, c3):
    pred1 = changing_lane(x, *changing_lane_coef)
#     pred2 = 1 - np.exp(- c1 * x[:,1]**2 - c2 * x[:,5]**2)
    pred2 = 1 / (1 + np.exp(c1 * abs(x[:,1]) + c2 * abs(x[:,5]) + c3))
    pred = np.minimum(1 - pred1, pred2)
    return pred

accelerating_data, accelerating_label = gen_train_data_label(x, y, lambda y: (y == 2) | (y == 3))
accelerating_coef, pcov = curve_fit(accelerating, accelerating_data, accelerating_label)
test(accelerating, accelerating_data, accelerating_label, accelerating_coef)

rfa = RandomForestClassifier(n_estimators=100, max_depth=5)
rfa.fit(accelerating_data, accelerating_label)
print(rfa.score(accelerating_data, accelerating_label))

accuracy: 0.7097799511
4999 2425
1136 3710
0.767970660147


In [173]:
pred_c = rfc.predict_proba(x)
pred_a = rfa.predict_proba(x)
pred_f = 1 - np.min(pred_c[:,1], )
pred_a[:,1]

array([ 0.40079827,  0.40079827,  0.40079827, ...,  0.68091269,
        0.54258512,  0.46234822])

In [92]:
split = int(len(x) * 0.8)
train_x = x[:split,:]
train_y = y[:split]
test_x = x[split:,:]
test_y = y[split:]

In [114]:
from sklearn.svm import SVC
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier

In [93]:
clf = SVC(gamma='auto')
clf.fit(train_x, train_y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [95]:
clf.score(train_x, train_y)

0.84254742547425476

In [104]:
clf.score(test_x, test_y)

0.76077851687607789

In [98]:
dtc = tree.DecisionTreeClassifier()
dtc.fit(train_x, train_y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [162]:
min_k = 1e10
for c in range(5):
    k = np.sum(y == c)
    min_k = min(k, min_k)

bidx = np.array([]).astype(int)
for c in range(5):
    bidx = np.concatenate([bidx, np.where(y == c)[0][:min_k]])


bx = x[bidx,:]
by = y[bidx]
print(len(bx))
print(len(by))

3830
3830


In [119]:
print(dtc.score(train_x, train_y))
print(dtc.score(test_x, test_y))

1.0
0.743532889874


In [163]:
rfc = RandomForestClassifier(n_estimators=100, max_depth=5)
rfc.fit(bx, by)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=5, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [164]:
print(rfc.score(train_x, train_y))
print(rfc.score(test_x, test_y))

0.476595220498
0.442128603104


In [147]:
rfc.predict_proba(x[0:1,:])

array([[ 0.0091362 ,  0.0136539 ,  0.50924947,  0.39904193,  0.0689185 ]])

In [148]:
f = open('random_forest.txt', 'wb')
pk.dump(rfc, f) 

In [112]:
xx = x[0,:]
dtc.predict(xx[np.newaxis,:])[0]

2

In [105]:
f = open('decision_tree.txt', 'wb')
pk.dump(dtc, f) 

In [None]:
f = open('changing_coef.txt','wb')
pk.dump(changing_lane_coef, f)
f = open('accelerating_coef.txt','wb')
pk.dump(accelerating_coef, f)

In [None]:
def following(x, c1, c2, c3, c4, c5):
    pred1 = changing_lane(x, c1, c2, c3)
    pred2 = accelerating(x, c4, c5)
    pred = 1 - np.maximum(pred1, pred2)
    print(pred1)
    print(pred2)
    print(pred)
    
    return pred

following_data, following_label = gen_train_data_label(x, y, lambda y: y == 4)
print(len(following_data))
print(sum(following_label))
test(following, following_data, following_label, np.concatenate([changing_lane_coef, accelerating_coef], axis=0))


In [189]:

def get_intention(df):
    dT = 0.1
    x = list()
    y = list()
    vehicles = dict()
    show_up = set()
    vel_sum = 0
    correct_cnt = 0
    for i in range(len(df)):
        r = df.iloc[i]
        v_id = r.Vehicle_ID

        show_up.add(v_id)

        if v_id not in vehicles.keys():
            df.at[i,'lateral_acc'] = 0
            df.at[i,'lateral_vel'] = 0
            vehicles[v_id] = r.copy()
        else:
            lateral_V = (r.Local_X - vehicles[v_id].Local_X) / dT
            vel_sum -= vehicles[v_id].v_Vel
            df.at[i,'lateral_acc'] = (lateral_V - vehicles[v_id]['lateral_vel']) /dT
            df.at[i,'lateral_vel'] = lateral_V
            vehicles[v_id] = r.copy()
            vel_sum += vehicles[v_id].v_Vel

        v_mean = vel_sum / len(vehicles)

        f = calc_features(vehicles, v_id, v_mean)
        f = f[np.newaxis,:]
#         pred_chg = changing_lane(f, *changing_lane_coef)
#         pred_acc = accelerating(f, *accelerating_coef)
        pred_chg = rfc.predict_proba(f)[:,1]
        pred_acc = rfa.predict_proba(f)[:,1]

        pred_flw = 1 - np.maximum(pred_chg, pred_acc)
        print(pred_chg)
        print(pred_acc)
        print(pred_flw)

        if pred_chg > 0.5:
            pred_beh = int(f[0,9] > 0)
        elif pred_acc > 0.5:
            pred_beh = int(f[0,1] > 0) + 2
        else:
            pred_beh = 4
        print(pred_beh)
        print(r.behavior)
        break
        correct_cnt += r.behavior == pred_beh
        if i > 0 and i % 100 == 0:
            print(correct_cnt / i)

        #remove exited car after every moment
        if i == len(df)-1 or r.Global_Time != df.iloc[i+1].Global_Time:
            for v_id in vehicles.keys():
                if v_id not in show_up:
                    vel_sum -= vehicles[v_id].v_Vel
                    vehicles.remove(v_id)

    print('acc: ', correct_cnt / len(df))

get_intention(df)

[ 0.45453129]
[ 0.43055007]
[ 0.54546871]
4
2.0
acc:  0.0


In [None]:

dd = df.loc[df['Vehicle_ID'] == 7]
dd = dd[['Vehicle_ID', 'v_Acc', 'Local_Y', 'Local_X', 'lateral_vel', 'lateral_acc', 'Lane_ID', 'behavior']]
pd.options.display.float_format = "{:.2f}".format
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(dd)
    

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
%matplotlib notebook

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')


is_0 = np.nonzero(label)
is_1 = np.delete(np.arange(len(label)), is_0)

ax.scatter(data[is_0,0]**2, data[is_0,1]**2, label[is_0], c='r', marker='o')
ax.scatter(data[is_1,0]**2, data[is_1,1]**2, label[is_1], c='b', marker='^')

ax.set_xlabel('f8')
ax.set_ylabel('f9')
ax.set_zlabel('label')
ax.set_xlim(0,200)
plt.show()
