# Experiment on Generated Data

Run notebook ```ssvm.ipynb```.

In [None]:
%run 'ssvm.ipynb'

Load trained parameters and prediction results

In [None]:
fname = os.path.join(data_dir, 'ssvm-listViterbi-Glas.pkl')

In [None]:
ssvm_lv = pickle.load(open(fname, 'rb'))  # a dict: query -> {'PRED': trajectory, 'C': ssvm-c, 'W': model_params}

In [None]:
def experiment_on_toy_data(query, C, W):
    trajid_set = set(trajid_set_all) - TRAJ_GROUP_DICT[query]
    poi_set = set()
    for tid in trajid_set: poi_set = poi_set | set(traj_dict[tid])
    poi_list = sorted(poi_list)
    n_states = len(poi_set)
    n_edge_features = 5
    n_node_features = (len(W) - n_states * n_states * n_edge_features) // n_states
    unary_params = W[:n_states * n_node_features].reshape(n_states, n_node_features)
    pw_params = W[n_states * n_node_features:].reshape((n_states, n_states, n_edge_features))    
    lengthes = [3, 4, 5, 6, 7]
    
    poi_id_dict, poi_id_rdict = dict(), dict()
    for idx, poi in enumerate(poi_list):
        poi_id_dict[poi] = idx
        poi_id_rdict[idx] = poi
        
    # compute features
    poi_info = calc_poi_info(list(trajid_set), traj_all, poi_all)
    traj_list = [traj_dict[k] for k in trajid_set if len(traj_dict[k]) >= 2]
    node_features_list = Parallel(n_jobs=N_JOBS)\
                         (delayed(calc_node_features)\
                          (tr[0], len(tr), poi_list, poi_info, poi_clusters=POI_CLUSTERS, \
                           cats=POI_CAT_LIST, clusters=POI_CLUSTER_LIST) for tr in traj_list)
    edge_features = calc_edge_features(list(trajid_set), poi_list, traj_dict, poi_info)

    # feature scaling
    X_node_all = np.vstack(node_features_list)
    scaler = MaxAbsScaler(copy=False)
    X_node_all = scaler.fit_transform(X_node_all)
    assert(len(traj_list) == X_node_all.shape[0])
    
    # generate trajectories
    fake_labels = []
    for poi in sorted(poi_list):
        for L in lengthes:
            X_node_test = calc_node_features(poi, L, poi_list, poi_info, poi_clusters=POI_CLUSTERS, \
                                             cats=POI_CAT_LIST, clusters=POI_CLUSTER_LIST)
            X_node_test = scaler.transform(X_node_test)  # feature scaling
            unary_features = X_node_test
            pw_features = edge_features.copy()
            y_pred = do_inference_listViterbi(poi, L, len(poi_set), unary_params, pw_params, unary_features, pw_features)
            fake_labels.append([poi_id_rdict[p] for p in y_pred])
            
    # do leave-one-out cross validation on generated trajectories
    predictions = dict()
    for i in range(len(fake_labels)):
        train_labels = fake_labels[:i] + fake_labels[i+1:]
        node_features_all = Parallel(n_jobs=N_JOBS)\
                            (delayed(calc_node_features)\
                             (tr[0], len(tr), poi_list, poi_info, poi_clusters=POI_CLUSTERS, \
                              cats=POI_CAT_LIST, clusters=POI_CLUSTER_LIST) for tr in train_labels)
        X_node_train = np.vstack(node_features_all)
        scaler_train = MaxAbsScaler(copy=False)
        X_node_train = scaler_train.fit_transform(X_node_train)
        assert(len(fake_labels) == X_node_train.shape[0])
        X_train = [(X_node_train[k, :, :], edge_features.copy(), 
                    (poi_id_dict[train_labels[k][0]], len(train_labels[k]))) for k in range(len(train_labels))]
        y_train = [np.array([poi_id_dict[k] for k in tr]) for tr in train_labels]
        assert(len(X_train) == len(y_train))
        sm = MyModel(inference_fun=do_inference_listViterbi)
        verbose = 0
        osssvm = OneSlackSSVM(model=sm, C=C, n_jobs=N_JOBS, verbose=verbose)
        try:
            osssvm.fit(X_train, y_train, initialize=True)
            print('SSVM training finished.')
        except:
            sys.stderr.write('SSVM training FAILED.\n')

        # predict using trained model
        startPOI, nPOI = fake_labels[i][0], len(fake_labels[i])
        X_node_test = calc_node_features(startPOI, nPOI, poi_list, poi_info, poi_clusters=POI_CLUSTERS, 
                                         cats=POI_CAT_LIST, clusters=POI_CLUSTER_LIST)
        X_node_test = scaler_train.transform(X_node_test)
        X_test = [(X_node_test, edge_features, (poi_id_dict[query[0]], query[1]))]
        y_hat = osssvm.predict(X_test)
        predictions[(startPOI, nPOI)] = {'PRED': np.array([poi_id_rdict[p] for p in y_hat[0]]), 'REAL':fake_labels[i]}
        
    # evaluation
    F1_ssvm = []; pF1_ssvm = []; tau_ssvm = []
    for key in sorted(predictions.keys()):
        F1 = calc_F1(predictions[key]['REAL'], predictions[key]['PRED'])
        pF1 = calc_pairsF1(predictions[key]['REAL'], predictions[key]['PRED'])
        tau = calc_kendalltau(predictions[key]['REAL'], predictions[key]['PRED'])
        F1_ssvm.append(F1); pF1_ssvm.append(pF1); tau_ssvm.append(tau)
    print('SSVM: F1 (%.3f, %.3f), pairsF1 (%.3f, %.3f), Tau (%.3f, %.3f)' % \
          (np.mean(F1_ssvm), np.std(F1_ssvm)/np.sqrt(len(F1_ssvm)), \
           np.mean(pF1_ssvm), np.std(pF1_ssvm)/np.sqrt(len(pF1_ssvm)), \
           np.mean(tau_ssvm), np.std(tau_ssvm)/np.sqrt(len(tau_ssvm))))

In [None]:
for query in sorted(ssvm_lv.keys()):
    experiment_on_toy_data(query, ssvm_lv[query]['C'], ssvm_lv[query]['W'])