In [1]:
import numpy as np
import pickle
from matplotlib.patches import Ellipse
from sklearn import mixture
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import math
from scipy.misc import imread
from scipy.misc import imshow
from matplotlib import animation
import warnings
warnings.filterwarnings('ignore')
% matplotlib inline

In [3]:
with open('hourlyUtilization100_uncapped.pck', 'rb') as f:
     load_data = pickle.load(f)
with open('ElementKeytoLatLong.pck', 'rb') as f:
    locations = pickle.load(f)

In [4]:
avg_loads = []
gps_loc = []
elkeys = sorted(load_data.keys())
for key in elkeys:
    curr = pd.DataFrame(load_data[key].items(), columns=['Datetime', 'Load'])
    curr['Datetime'] = pd.to_datetime(curr['Datetime'])
    curr['Hour'] = curr['Datetime'].apply(lambda x: x.hour)
    curr['Day'] = curr['Datetime'].apply(lambda x: x.weekday())
    curr = curr.loc[curr['Day'] != 6]
    curr.reset_index(inplace=True)
    avg_loads.append([curr.loc[(curr['Hour'] == hour) & (curr['Day'] == day)]['Load'].mean() 
                      for day in range(6) for hour in sorted(curr['Hour'].unique())])
    mid_lat = (locations[key][0][0] + locations[key][1][0])/2.
    mid_long = (locations[key][0][1] + locations[key][1][1])/2.
    gps_loc.append([mid_lat, mid_long])
avg_loads = np.vstack((avg_loads))
gps_loc = np.vstack((gps_loc))
colors = ['b', 'g', 'orange', 'c', 'm', 'y', 'k', 'w', 'purple']

In [None]:
for time in range(10):
    likelihoods = []
    for num_comps in range(1, 10):
        cluster_data = np.hstack((avg_loads[:, time, None], gps_loc))
        cluster_data_true = cluster_data
        scaler = MinMaxScaler().fit(cluster_data)
        cluster_data = scaler.transform(cluster_data)
        # Fit a Gaussian mixture with EM using five components
        gmm = mixture.GaussianMixture(n_init=15, n_components=num_comps, covariance_type='diag').fit(cluster_data)
        likelihoods.append(gmm.lower_bound_)
    plt.plot(likelihoods, '-o', linewidth=1, label=time)
#plt.legend()

In [None]:
days = {0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday'}
upleft = [47.6197793,-122.3592749]
bttmright = [47.607274, -122.334786]
imgsize = [1135,864]
mp = MapOverlay(upleft, bttmright, imgsize)
pixpos = np.array([mp.to_image_pixel_position(list(gps_loc[i,:])) for i in range(256)])
center = ((upleft[0] - bttmright[0])/2., (upleft[1] - bttmright[1])/2.)
pix_center = mp.to_image_pixel_position(list(center))
fig = plt.figure(figsize=(18,16))
ax = plt.axes(xlim=(min(pixpos[:,0]), max(pixpos[:,0])), ylim=(min(pixpos[:,1]), max(pixpos[:,1])))
ax.invert_yaxis()
ax.scatter(pixpos[:, 0], pixpos[:, 1])
ax.xaxis.label.set_fontsize(25)
ax.set_title('Gaussian Mixture Model on Average Load Distribution and Location', fontsize=25)
im = imread("belltown.png")
ax.imshow(im)
scat = ax.scatter(pixpos[:, 0], pixpos[:, 1], s=175, color='red')
scat_mean = ax.scatter([], [], s=500, color='red')
num_comps = 4
patches = [Ellipse(xy=(0, 0), width=0, height=0, angle=0, edgecolor='black', 
           facecolor='none', lw='4') for comp in range(2*num_comps)]
ellipses = [ax.add_patch(patches[comp]) for comp in range(2*num_comps)]

def animate(time, avg_loads=avg_loads, gps_loc=gps_loc, num_comps=num_comps):
    cluster_data = np.hstack((avg_loads[:, time, None], gps_loc))
    cluster_data_true = cluster_data

    scaler = MinMaxScaler().fit(cluster_data)
    cluster_data = scaler.transform(cluster_data)
    # Fit a Gaussian mixture with EM using five components
    gmm = mixture.GaussianMixture(n_init=15, n_components=num_comps, covariance_type='diag').fit(cluster_data)
    means = np.vstack(([(mean[1:] - scaler.min_[1:])/(scaler.scale_[1:]) for mean in gmm.means_]))
    covs = np.dstack(([np.diag((cov[1:])/(scaler.scale_[1:]**2)) for cov in gmm.covariances_])).T
    labels = gmm.predict(cluster_data)    
    default_means = np.array([[47.61702269, -122.3522597],[47.61493758, -122.34991668],
                     [47.61778095, -122.3512094],[47.6168215, -122.34615319],
                     [47.61331815, -122.34253372],[47.61213144, -122.34610443]])

    color_codes = {}
    for i in range(num_comps):
        dists = [(j, np.linalg.norm(means[i] - default_means[j])) for j in range(num_comps)]
        best_colors = sorted(dists, key=lambda item:item[1])
        unused_colors = [color[0] for color in best_colors if color[0] not in color_codes.values()]
        choice = unused_colors[0]
        color_codes[i] = choice

    scat.set_color([colors[color_codes[labels[i]]] for i in range(len(labels))]) 
    
    num = 0
    for i in range(num_comps):
        lambda_, v = np.linalg.eig(covs[i])
        lambda_ = np.sqrt(lambda_)
        for j in [1, 2]:
            xy = mp.to_image_pixel_position(list(means[i,:]))
            width = lambda_[0]*j*2
            height = lambda_[1]*j*2 
            new_center = (center[0]+width, center[1]+height)
            new_center = mp.to_image_pixel_position(list(new_center))
            width = abs(new_center[0] - pix_center[0])
            height = abs(new_center[1] - pix_center[1])
            patches[num].center = xy
            patches[num].width = width
            patches[num].height = height
            patches[num].edgecolor = colors[color_codes[i]]
            num += 1
    means = np.array([mp.to_image_pixel_position(list(means[i,:])) for i in range(len(means))])
    scat_mean.set_offsets(means)
    hour = time % 10
    day = time/10
    ax.set_xlabel(days[day] + ' ' + str(8+hour) + ':00')

In [None]:
ani = animation.FuncAnimation(fig, animate, frames=avg_loads.shape[1], interval=200)
FFwriter = animation.FFMpegWriter()
ani.save('mixture.mp4', writer = FFwriter)