In [1]:
import numpy as np
from sklearn import mixture
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import os
import matplotlib.pyplot as plt
from matplotlib import animation 
from map_overlay import MapOverlay
import utils
import mixture_animation

In [2]:
curr_dir = os.getcwd()
data_path = curr_dir + '/../data/'
fig_path = curr_dir + '/../figs/'
animation_path = curr_dir + '/../animation/'

In [3]:
params = utils.load_data(data_path)
gps_loc, avg_loads, park_data, N, P, idx_to_day_hour, day_hour_to_idx = params

In [None]:
model_selection = {"likelihood": [], "bic": [], "aic": []} 
min_comps = 1
max_comps = 20

for time in range(P):    
    likelihoods = []
    bics = []
    aics = []

    for num_comps in range(min_comps, max_comps):
        cluster_data = np.hstack((avg_loads[:, time, None], gps_loc))

        scaler = MinMaxScaler().fit(cluster_data)
        cluster_data = scaler.transform(cluster_data)

        gmm = mixture.GaussianMixture(n_init=10, n_components=num_comps, 
                                      covariance_type='diag').fit(cluster_data)

        likelihoods.append(gmm.lower_bound_)
        bics.append(gmm.bic(cluster_data))
        aics.append(gmm.aic(cluster_data))

    model_selection['likelihood'].append(likelihoods)
    model_selection['bic'].append(bics)
    model_selection['aic'].append(aics)

In [None]:
# Likelihood model selection plot.
plt.figure()
mean_likelihood = np.mean(np.vstack((model_selection['likelihood'])), axis=0)
plt.plot(range(min_comps, max_comps), mean_likelihood, 'o-', color='red')
plt.axvline(x=5, color='black')
plt.axvline(x=10, color='black')
plt.axvline(x=15, color='black')
plt.axvline(x=20, color='black')
plt.axvline(x=25, color='black')
plt.xlabel('Number of Components')
plt.ylabel('Likelihood')
plt.title('Likelihood Model Selection')
plt.savefig(os.path.join(fig_path, 'likelihood_model.png'))

# BIC model selection plot.
plt.figure()
mean_bic = np.mean(np.vstack((model_selection['bic'])), axis=0)
plt.plot(range(min_comps, max_comps), mean_bic, 'o-', color='red')
plt.axvline(x=5, color='black')
plt.axvline(x=10, color='black')
plt.axvline(x=15, color='black')
plt.axvline(x=20, color='black')
plt.axvline(x=25, color='black')
plt.xlabel('Number of Components')
plt.ylabel('BIC')
plt.title('BIC Model Selection')
plt.savefig(os.path.join(fig_path, 'bic_model.png'))

# AIC model selection plot.
plt.figure()
mean_aic = np.mean(np.vstack((model_selection['aic'])), axis=0)
plt.plot(range(min_comps, max_comps), mean_aic, 'o-', color='red')
plt.axvline(x=5, color='black')
plt.axvline(x=10, color='black')
plt.axvline(x=15, color='black')
plt.axvline(x=20, color='black')
plt.axvline(x=25, color='black')
plt.xlabel('Number of Components')
plt.ylabel('AIC')
plt.title('AIC Model Selection')
plt.savefig(os.path.join(fig_path, 'aic_model.png'))

In [4]:
num_comps = 4

params = mixture_animation.init_animation(gps_loc, num_comps, N, fig_path)
fig, ax, scatter, scatter_centroid, patches, ellipses, mp, center, pix_center = params

times = range(P)

default_means = np.array([[47.61348888, -122.34343007],[47.61179196, -122.34500616],
                          [47.61597088, -122.35054099],[47.61706817, -122.34617185]])

ani = animation.FuncAnimation(fig=fig, func=mixture_animation.animate, frames=P, 
                              fargs=(times, ax, scatter, scatter_centroid, patches, 
                                     ellipses, mp, default_means, center, 
                                     pix_center, avg_loads, gps_loc, num_comps, ), 
                              interval=200)


FFwriter = animation.FFMpegWriter(fps=1)
ani.save(os.path.join(animation_path, 'mixture.mp4'), writer=FFwriter)