In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas
import math
from hmmlearn import hmm
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from recurrent_nueral_net import *

from create_train_test_val_maps import *

In [2]:
def start_train(selected_codes, train_map_revived, num_iter, num_states):
    models = {}
    codes = []
    code_window_map = {}
    for vehicleID in train_map_revived.keys():
        for ATA6code in train_map_revived[vehicleID].keys():
            if ATA6code not in selected_codes:
                continue
            if ATA6code not in code_window_map:
                code_window_map[ATA6code] = {}
            for time_window in train_map_revived[vehicleID][ATA6code].keys():
                if time_window not in code_window_map[ATA6code]:
                    code_window_map[ATA6code][time_window] = []
                for sequence_of_snapshots in train_map_revived[vehicleID][ATA6code][time_window]:
                    code_window_map[ATA6code][time_window].append(sequence_of_snapshots)

    for ATA6code in code_window_map.keys():
        for time_window in code_window_map[ATA6code].keys():
            lengths = []
            listofsequences = code_window_map[ATA6code][time_window]
            X = pandas.concat(listofsequences).as_matrix()
            for sequence in listofsequences:
                lengths.append(sequence.shape[0]) 
            models[(ATA6code, time_window)] = hmm.GaussianHMM(n_components=num_states, n_iter=num_iter).fit(X.astype(float),lengths)
    return models

In [61]:
def get_results(selected_codes,models, val_map_revived, num_time_windows):    
    # VALIDATION ACCURACY
    num_codes = len(selected_codes)
    lengths = []
    for vehicleID in val_map_revived.keys():
        for ATA6code in val_map_revived[vehicleID].keys():
            if ATA6code not in selected_codes:
                continue
            for time_window in val_map_revived[vehicleID][ATA6code].keys():
                lengths.append(len(val_map_revived[vehicleID][ATA6code][time_window]))
    num_val = sum(lengths)
    #print(num_val)

    sample = 0
    labels = []
    code_only_labels = []
    joint_predictions = []
    code_only_predictions = []
    
        
    log_likelihoods = np.zeros((num_val, num_codes, num_time_windows))
    probabilities = np.zeros((num_val, num_codes, num_time_windows))
    for vehicleID in val_map_revived.keys():
        for ATA6code in val_map_revived[vehicleID].keys():
            if ATA6code not in selected_codes:
                continue
            for time_window in val_map_revived[vehicleID][ATA6code].keys():
                for sequence_of_snapshots in val_map_revived[vehicleID][ATA6code][time_window]:
                    labels.append([selected_codes.index(ATA6code),time_window])
                    code_only_labels.append(selected_codes.index(ATA6code))
                    x = sequence_of_snapshots.as_matrix() 
                    for pair in models.keys():
                        c = selected_codes.index(pair[0])
                        w = pair[1]
                        if c == 0 & w == 0:
                            temp_score = models[pair].score(x.astype(float))
                            for i in range(len(log_likelihoods[sample,c,:])):
                                log_likelihoods[sample,c,i] = temp_score
                        else:
                            log_likelihoods[sample,c,w] = models[pair].score(x.astype(float))
                    
                    # Compute Softmax Prob Matrix
                    probabilities_temp = softmax(log_likelihoods[sample].reshape(1,num_codes*num_time_windows))
                    probabilities[sample] = probabilities_temp.reshape(num_codes, num_time_windows) 
                    predicted_code_only = np.argmax(np.sum(probabilities[sample],axis=1))
                    predicted_code, predicted_window = ind2sub(probabilities[sample].shape,np.argmax(probabilities[sample]))
                    joint_predictions.append([predicted_code,predicted_window])
                    code_only_predictions.append(predicted_code_only)
                    sample = sample + 1
    return joint_predictions, labels, code_only_predictions, code_only_labels 

In [49]:
def get_report(predictions, labels, num_labels):
    correctly_predicted_counts = np.zeros(num_labels)
    actual_counts = np.zeros(num_labels)
    for i,prediction in enumerate(predictions):
        if prediction == labels[i]:
            correctly_predicted_counts[labels[i]]  = correctly_predicted_counts[labels[i]]  + 1
        actual_counts[labels[i]] = actual_counts[labels[i]] + 1
    statistics_report = np.nan_to_num(correctly_predicted_counts/actual_counts)
    print(statistics_report)
    mu = np.mean(statistics_report)
    sd = np.std(statistics_report)
    return mu, sd

In [67]:
num_iter = 100
num_states = 2
#alpha = 0.5
selected_codes = [0,45021, 44004, 43004, 45008, 45002, 45007]
window_pairs = [(1,500),(2,250),(5,100),(10,50)]

#window_pairs = [(1,500)]
for num_windows, window_size in window_pairs:
    print('(num_windows, window_size): (',num_windows, ', ', window_size, ')')
    print('---------------------------------------------------')
    # train
    train_combined, val_combined, test_combined = load_maps(num_windows, window_size)
    models = start_train(selected_codes, train_combined, num_iter, num_states)
    # get predictions
    predictions, labels, code_predictions, code_labels = get_results(selected_codes, models, val_combined, num_windows)
    predictions = np.array(predictions)
    labels = np.array(labels)
    code_predictions = np.array(code_predictions)
    code_labels = np.array(code_labels)
    
    
    # Get joint Report
    mapped_predictions = np.zeros(predictions.shape[0])
    mapped_labels = np.zeros(labels.shape[0])
    for i in range(predictions.shape[0]):
        mapped_predictions[i] = sub2ind((len(selected_codes),num_windows), predictions[i,0], predictions[i,1])
        mapped_labels[i] = sub2ind((len(selected_codes),num_windows), labels[i,0], labels[i,1])
    joint_mu, joint_std = get_report(mapped_predictions, mapped_labels, len(selected_codes)*num_windows)
    print('Joint mean: ', joint_mu)
    print('Joint std: ', joint_std)
    
    # Get Code Report
    code_mu, code_std = get_report(code_predictions, code_labels, len(selected_codes))
    print('Code Only mean: ', code_mu)
    print('Code Only std: ', code_std)
    print('\n')

(num_windows, window_size): ( 1 ,  500 )
---------------------------------------------------
[ 0.          0.88888889  0.          0.          0.          0.          0.        ]
Joint mean:  0.126984126984
Joint std:  0.311046316544
[ 0.          0.88888889  0.          0.          0.          0.          0.        ]
Code Only mean:  0.126984126984
Code Only std:  0.311046316544


(num_windows, window_size): ( 2 ,  250 )
---------------------------------------------------
[ 0.          0.          0.38461538  0.          0.6         0.          0.
  0.          0.          0.          0.          0.          0.          0.        ]
Joint mean:  0.0703296703297
Joint std:  0.177015196822
[ 0.          0.36363636  0.44444444  0.          0.          0.          0.        ]
Code Only mean:  0.11544011544
Code Only std:  0.18380009614


(num_windows, window_size): ( 5 ,  100 )
---------------------------------------------------
[ 0.          0.          0.          0.          0.         

In [70]:
def start_train_only_codes(selected_codes, train_map_revived, num_iter, num_states):
    models = {}
    codes = []
    code_window_map = {}
    for vehicleID in train_map_revived.keys():
        for ATA6code in train_map_revived[vehicleID].keys():
            if ATA6code not in selected_codes:
                continue
            if ATA6code not in code_window_map:
                code_window_map[ATA6code] = {}
            for time_window in train_map_revived[vehicleID][ATA6code].keys():
                if time_window not in code_window_map[ATA6code]:
                    code_window_map[ATA6code][time_window] = []
                for sequence_of_snapshots in train_map_revived[vehicleID][ATA6code][time_window]:
                    code_window_map[ATA6code][time_window].append(sequence_of_snapshots)

    for ATA6code in code_window_map.keys():
        Xcode = []
        lengths_code = []
        for time_window in code_window_map[ATA6code].keys():
            lengths = []
            listofsequences = code_window_map[ATA6code][time_window]
            X = pandas.concat(listofsequences).as_matrix()
            if (len(Xcode) == 0):
                Xcode = X
            else:                
                Xcode = np.concatenate((Xcode, X))
            for sequence in listofsequences:
                lengths.append(sequence.shape[0])
                lengths_code.append(sequence.shape[0])
        models[ATA6code] = hmm.GaussianHMM(n_components=num_states, n_iter=num_iter).fit(Xcode.astype(float),lengths_code)
    return models

In [85]:
def get_results_only_codes(selected_codes,models, val_map_revived, num_time_windows):    
    # VALIDATION ACCURACY
    num_codes = len(selected_codes)
    lengths = []
    for vehicleID in val_map_revived.keys():
        for ATA6code in val_map_revived[vehicleID].keys():
            if ATA6code not in selected_codes:
                continue
            for time_window in val_map_revived[vehicleID][ATA6code].keys():
                lengths.append(len(val_map_revived[vehicleID][ATA6code][time_window]))
    num_val = sum(lengths)
    #print(num_val)

    sample = 0
    code_only_labels = []
    code_only_predictions = []
    
        
    log_likelihoods = np.zeros((num_val, num_codes))
    probabilities = np.zeros((num_val, num_codes))
    for vehicleID in val_map_revived.keys():
        for ATA6code in val_map_revived[vehicleID].keys():
            if ATA6code not in selected_codes:
                continue
            for time_window in val_map_revived[vehicleID][ATA6code].keys():
                for sequence_of_snapshots in val_map_revived[vehicleID][ATA6code][time_window]:
                    #labels.append([selected_codes.index(ATA6code),time_window])
                    code_only_labels.append(selected_codes.index(ATA6code))
                    x = sequence_of_snapshots.as_matrix() 
                    for code in models.keys():
                        c = selected_codes.index(code)
                        #print('here')
                        #print(c)
                        log_likelihoods[sample,c] = models[code].score(x.astype(float))
                    
                    # Compute Softmax Prob Matrix
                    probabilities_temp = softmax(log_likelihoods[sample].reshape(1,num_codes))
                    probabilities[sample] = probabilities_temp.reshape(1, num_codes) 
                    predicted_code_only = np.argmax(probabilities[sample])
                    code_only_predictions.append(predicted_code_only)
                    sample = sample + 1
    return code_only_predictions, code_only_labels 

In [87]:
num_iter = 100
num_states = 2
#alpha = 0.5
selected_codes = [0,45021, 44004, 43004, 45008, 45002, 45007]
window_pairs = [(1,500),(2,250),(5,100),(10,50)]

#window_pairs = [(1,500)]
for num_windows, window_size in window_pairs:
    print('(num_windows, window_size): (',num_windows, ', ', window_size, ')')
    print('---------------------------------------------------')
    # train
    train_combined, val_combined, test_combined = load_maps(num_windows, window_size)
    models = start_train_only_codes(selected_codes, train_combined, num_iter, num_states)
    # get predictions
    code_predictions, code_labels = get_results_only_codes(selected_codes, models, val_combined, num_windows)
    code_predictions = np.array(code_predictions)
    code_labels = np.array(code_labels)
    
        
    # Get Code Report
    code_mu, code_std = get_report(code_predictions, code_labels, len(selected_codes))
    print('Code Only mean: ', code_mu)
    print('Code Only std: ', code_std)
    print('\n')

(num_windows, window_size): ( 1 ,  500 )
---------------------------------------------------
[ 0.          0.88888889  0.          0.          0.          0.          0.        ]
Code Only mean:  0.126984126984
Code Only std:  0.311046316544


(num_windows, window_size): ( 2 ,  250 )
---------------------------------------------------
[ 0.          0.40909091  0.44444444  0.          0.          0.          0.        ]
Code Only mean:  0.121933621934
Code Only std:  0.193025379158


(num_windows, window_size): ( 5 ,  100 )
---------------------------------------------------
[ 0.          0.61666667  0.28        0.          0.          0.33333333
  0.        ]
Code Only mean:  0.175714285714
Code Only std:  0.224773002147


(num_windows, window_size): ( 10 ,  50 )
---------------------------------------------------
[ 0.          0.61417323  0.30769231  0.          0.          0.33333333
  0.        ]
Code Only mean:  0.179314124196
Code Only std:  0.226122449694


