In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import tensorflow as tf
from datetime import datetime
from dateutil import tz
from IPython import embed
import time
import socket

np.random.seed(42)

%matplotlib inline
plt.style.use('ggplot')

In [None]:
#user input for filename of saved model, defaults to timestamp
name = input("String to add to model, defaults to time stamp if nothing entered.")  # Python 3
if name == "":
    name = time.time()

#user input to add note to plot
plot_note=input("Note you'd like to add to plot:")

#user input to load prev model

model_to_load=input("enter the model name to load - leave blank to start fresh")

    
#number of epochs between each printed update and model save
checkin_frequency =  1
    
################Hyperparameters
input_height = 1
input_num_timestamps = 36
num_channels = 11
loss_function = "L2_loss"

batch_size = 250
kernel_size = 6
depth = 40
num_hidden = 10

max_pool_kernel_size=6
speed_bucket_size = "none_use_regression"

learning_rate = 0.0001
training_epochs = 100
########################

def read_data(file_path):
    # SC/AG 6/2: data is using the 0th row as the headers
    data = pd.read_csv(file_path,header = 0)
    return data

def feature_normalize(dataset):
    mu = np.mean(dataset,axis = 0)
    sigma = np.std(dataset,axis = 0)
    return (dataset - mu)/sigma
    
def plot_axis(ax, x, y, title):
    ax.plot(x, y)
    ax.set_title(title)
    ax.xaxis.set_visible(False)
    ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
    ax.set_xlim([min(x), max(x)])
    ax.grid(True)
    
def plot_activity(activity,data):
    fig, (ax0, ax1, ax2) = plt.subplots(nrows = 3, figsize = (15, 10), sharex = True)
    plot_axis(ax0, data['timestamp'], data['bounce'], 'bounce')
    plot_axis(ax1, data['timestamp'], data['braking'], 'braking')
    plot_axis(ax2, data['timestamp'], data['pelvic_tilt'], 'pelvic_tilt')
    plt.subplots_adjust(hspace=0.2)
    fig.suptitle(activity)
    plt.subplots_adjust(top=0.90)
    plt.show()
    
def windows(data, size):
    start = 0
    while start < data.count():
        yield int(start), int(start + size)
        start += (size / 2)

def segment_signal(data,window_size = input_num_timestamps):
    segments = np.empty((0,window_size,num_channels))
    labels = np.empty((0))
    for (start, end) in windows(data['timestamp'], window_size):
        #print(start)
        #print(end)
        aa = data["age"][start:end]
        bb = data["weight"][start:end]
        cc = data["height"][start:end]
        dd = data["gender"][start:end]
        a = data["bounce"][start:end]
        b = data["braking"][start:end]
        c = data["cadence"][start:end]
        d = data["ground_contact"][start:end]
        e = data["pelvic_drop"][start:end]
        f = data["pelvic_rotation"][start:end]
        g = data["pelvic_tilt"][start:end]
        #print(len(dataset['timestamp'][start:end])&&dataset['user-id'][start]==dataset['user-id'][start])
        #print(start)
        #print(data.shape[0])
        #print(end)
        #if(end < data.shape[0]):
        #       print("pass")
        #print(data['activity_id'][end])
        if(end < data.shape[0] and
           len(data['timestamp'][start:end]) == window_size and
           data['activity_id'][start]==data['activity_id'][end]): 
            
            segments = np.vstack([segments,np.dstack([aa,bb,cc,dd,a,b,c,d,e,f,g])])
            # labels = np.append(labels,np.median(data["gps_speed_round_half_unit"][start:end]))
            
            if speed_bucket_size == "0.1":
                 labels = np.append(labels,np.around(np.mean(np.data["gps_speed_true"][start:end]),decimals=1)) 
                    # round to nearest decimal
            elif speed_bucket_size == "0.5":
                 labels = np.append(labels,np.around(2*np.mean(data["gps_speed_true"][start:end]),decimals=0)/2) 
            elif speed_bucket_size == "none_use_regression":
                 labels = np.append(labels,np.mean(data["gps_speed_true"][start:end]))
                    # round to nearest half unit
            
    num_labels = len(np.unique(labels))
    return segments, labels, num_labels

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.0, shape = shape)
    return tf.Variable(initial)

def depthwise_conv2d(x, W):
    return tf.nn.depthwise_conv2d(x,W, [1, 1, 1, 1], padding='VALID')

def apply_depthwise_conv(x,kernel_size,num_channels,depth):
    weights = weight_variable([1, kernel_size, num_channels, depth])
    biases = bias_variable([depth * num_channels])
    return tf.nn.relu(tf.add(depthwise_conv2d(x, weights),biases))
    
def apply_max_pool(x,kernel_size,stride_size):
    return tf.nn.max_pool(x, ksize=[1, 1, kernel_size, 1], 
                          strides=[1, 1, stride_size, 1], padding='VALID')

In [None]:
dataset = read_data('quarter-big.csv')

##need to normalize age/weight/height

dataset['weight'] = feature_normalize(dataset['weight'])
dataset['height'] = feature_normalize(dataset['height'])
dataset['bounce'] = feature_normalize(dataset['bounce'])
dataset['gender'] = feature_normalize(dataset['gender'])
dataset['age'] = feature_normalize(dataset['age'])
dataset['braking'] = feature_normalize(dataset['braking'])
dataset['cadence'] = feature_normalize(dataset['cadence'])
dataset['ground_contact'] = feature_normalize(dataset['ground_contact'])
dataset['pelvic_drop'] = feature_normalize(dataset['pelvic_drop'])
dataset['pelvic_rotation'] = feature_normalize(dataset['pelvic_rotation'])
dataset['pelvic_tilt'] = feature_normalize(dataset['pelvic_tilt'])


#utc = datetime.strptime(dataset['timestamp'][1], '%Y-%m-%d %H:%M:%S+%f')
#dt64=np.datetime64(dataset['timestamp'])
#mytime=(dt64 - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')

#print(mytime)
#print(dataset)

In [None]:
#for activity in np.unique(dataset["gps_speed_half_unit"]):
 #   subset = dataset[dataset["gps_speed_half_unit"] == activity][:180]
  #  plot_activity(activity,subset)

In [None]:
segments, labels, num_labels = segment_signal(dataset)
labels = np.asarray(pd.get_dummies(labels), dtype = np.int8)
#print(segments)
#print(labels)
reshaped_segments = segments.reshape(len(segments), 1,input_num_timestamps, num_channels)

In [None]:
train_test_split = np.random.rand(len(reshaped_segments)) < 0.90
train_x = reshaped_segments[train_test_split]
train_y = labels[train_test_split]
test_x = reshaped_segments[~train_test_split]
test_y = labels[~train_test_split]

In [None]:
total_batches = train_x.shape[0] // batch_size

In [None]:
X = tf.placeholder(tf.float32, shape=[None,input_height,input_num_timestamps,num_channels])
Y = tf.placeholder(tf.float32, shape=[None,num_labels])

c = apply_depthwise_conv(X,kernel_size,num_channels,depth)
p = apply_max_pool(c,max_pool_kernel_size,2)
c = apply_depthwise_conv(p,6,depth*num_channels,depth//10)

shape = c.get_shape().as_list()
c_flat = tf.reshape(c, [-1, shape[1] * shape[2] * shape[3]])

f_weights_l1 = weight_variable([shape[1] * shape[2] * depth * num_channels * (depth//10), num_hidden])
f_biases_l1 = bias_variable([num_hidden])
f = tf.nn.tanh(tf.add(tf.matmul(c_flat, f_weights_l1),f_biases_l1))

out_weights = weight_variable([num_hidden, num_labels])
out_biases = bias_variable([num_labels])

y_ = tf.matmul(f, out_weights) + out_biases

In [None]:
#Specifying loss function from flag hyperparameter
if loss_function == "cross_entropy":
    loss = -tf.reduce_sum(Y * tf.log(y_))
elif loss_function == "L1_loss":
    loss = tf.reduce_sum(np.abs(Y - y_))
elif loss_function == "L2_loss":
    loss = tf.reduce_sum((Y - y_) * (Y - y_))
    
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(loss)

correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))

error = tf.reduce_mean(tf.cast(np.abs(Y - y_), tf.float32))
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
cost_history = np.empty(shape=[1],dtype=float)
test_error_history=np.empty(shape=[1],dtype=float)
last_test_error= 1 
# Hard-coding 1 for no reason. 

#create instance of saver to save model 
saver = tf.train.Saver()


with tf.Session() as session:
    tf.global_variables_initializer().run()
    if model_to_load!="":
        saver.restore(session, "models/"+model_to_load+".ckpt")
    counter =  0
    cost_at_epoch=[]
    last_checkin_time=time.time()
    train_error_history=[]
    
    
    for epoch in range(training_epochs):
        for b in range(total_batches):    
            offset = (b * batch_size) % (train_y.shape[0] - batch_size)
            #print(offset)
            batch_x = train_x[offset:(offset + batch_size), :, :, :]
            #print(batch_x)
            batch_y = train_y[offset:(offset + batch_size), :]
            #print(batch_y)
            _, c = session.run([optimizer, loss],feed_dict={X: batch_x, Y : batch_y})
            cost_history = np.append(cost_history,c)
        cost_at_epoch.append(c)    
        
        counter += 1
        
        if (counter % checkin_frequency == 0):
            
            last_test_error=session.run(error, feed_dict={X: test_x, Y: test_y})
            test_error_history=np.append(test_error_history,last_test_error)
        
            last_train_error = session.run(error, feed_dict={X: train_x, Y: train_y})
            train_error_history=np.append(train_error_history,last_train_error)
              
            print("Epoch: ",epoch,"\n"+"Training Loss: ",c)
            print("Testing MAE:",last_test_error,"\n","Training MAE: ", last_train_error)
            #print("Current Time:"+str(time.time()))
            print("Time cost:"+str(round(time.time()-last_checkin_time,1)))
            last_checkin_time=time.time()
            
            
            if counter > 1:
                # Save the variables to disk.
                save_path = saver.save(session, "models/"+name+".ckpt")
                print("Model saved in path: %s" % save_path)
                print("\n")
               
            

    from matplotlib.patches import Rectangle
    
    fig, ax1 = plt.subplots()
    ax1.plot(np.squeeze(train_error_history))
    plt2=ax1.twinx()
    
    plt2.plot(np.squeeze(test_error_history),"b-")
    
 
    plt.ylabel('Train vs. Test MAE')
    plt.xlabel('epoch x'+str(checkin_frequency))
    plt.title("Final MAE =" + str(last_test_error))
    extra = Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
    plt.legend([extra,extra,extra,extra,extra,extra,extra,extra,extra,extra,extra,extra,extra,extra],(
                                                    "loss: " + loss_function,
                                                    "learning rate: " + str(learning_rate),
                                                    "batch_size: " + str(batch_size),
                                                    "kernel_size: " + str(kernel_size),
                                                    "depth: "+ str(depth),
                                                    "layers : "+ str(num_hidden),
                                                    "max pool kernel size: "+str(max_pool_kernel_size),
                                                    "speed_bucket_size: " + speed_bucket_size,
                                                    "learning rate: "+str(learning_rate),
                                                    "epochs: "+str(training_epochs),
                                                    "input_num_timestamps: " + str(input_num_timestamps),
                                                    "num_labels: " + str(num_labels),
                                                    "num_channels:" + str(num_channels),
                                                    "note:" + plot_note))
    
    
    plt.savefig("plots/"+str(time.time())+"User-"+str(socket.gethostname())+".png")
   
    fig.tight_layout()
    plt.show()
    
    
    
    print("Testing MAE:", session.run(error, feed_dict={X: test_x, Y: test_y}))
    
    
    
    
    
    
   
    
    

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())