In [1]:
import torchcraft
from torchcraft import replayer
import os
import pickle
import numpy as np
import data_utils
from sklearn.externals import joblib

from hmmlearn.hmm import GaussianHMM

from pomegranate import *
import warnings
warnings.filterwarnings("ignore")


from generate_role_datasets import unit_to_dict, game_over_time, post_process, hmm_data, hyper_params


In [2]:
params = hyper_params()

In [None]:
big_data = True
n_components = params['components']

model = GaussianHMM(n_components=n_components, covariance_type="diag", n_iter=10)

if big_data:
    games_counted = 0
    # Cell for data generation across many games
    X = []
    for replay_path in params['replays_master']:
        if games_counted >= params['num_games_to_parse']:
            break
        replay_data = replayer.load(replay_path)
        game_info = game_over_time(replay_data, 
                                   valid_types=params['valid_units'],
                                   playerid=params['playerid'],
                                   step_size=params['step_frames'],
                                   feature_set=params['feature_set'],
                                   add_orders=params['add_orders']))
        
        hmm_samples = hmm_data(game_info, n_timesteps=params['window_size'])
        print("Length of game: %s" % len(game_info))
        print("Number of HMM Samples: %s" % len(hmm_samples))
        if len(hmm_samples) > 0:
            games_counted += 1
#             X.extend(hmm_samples)
            input_data = np.concatenate(hmm_samples[:])
            input_lengths = [params['window_size']] * len(hmm_samples)
            model.fit(input_data, input_lengths)
else:
    # Cell for data generation across 1 game (debugging)
    replay_path = params['replay_path']
    replay_data = replayer.load(replay_path)
    game_info = game_over_time(replay=replay_data, 
                               valid_types=params['valid_units'],
                               playerid=params['playerid'],
                               step_size=params['step_frames'])
    hmm_samples = hmm_data(game_info, n_timesteps=params['window_size'])
    print("Length of game: %s" % len(game_info))
    print("Number of HMM Samples: %s" % len(hmm_samples))
    X = hmm_samples
    smaller_data = np.concatenate(X[:])
    smaller_lengths = [params['window_size']] * len(X)
    model.fit(smaller_data, smaller_lengths)


Length of game: 3407
Number of HMM Samples: 11412
Length of game: 1991
Number of HMM Samples: 2192
Length of game: 3341
Number of HMM Samples: 11379
Length of game: 2373
Number of HMM Samples: 6383
Length of game: 2822
Number of HMM Samples: 0
Length of game: 1064
Number of HMM Samples: 1190
Length of game: 2450
Number of HMM Samples: 6245
Length of game: 1278
Number of HMM Samples: 0
Length of game: 623
Number of HMM Samples: 0
Length of game: 494
Number of HMM Samples: 0
Length of game: 2626
Number of HMM Samples: 9203
Length of game: 2422
Number of HMM Samples: 0
Length of game: 2495
Number of HMM Samples: 7786
Length of game: 2049
Number of HMM Samples: 0
Length of game: 722
Number of HMM Samples: 0
Length of game: 1527
Number of HMM Samples: 0
Length of game: 3045
Number of HMM Samples: 7379
Length of game: 1742
Number of HMM Samples: 0
Length of game: 1717
Number of HMM Samples: 2888
Length of game: 1231
Number of HMM Samples: 1451
Length of game: 2143
Number of HMM Samples: 5784

In [None]:
clf_name = 'clf/gaussianhmm' + str(params['components']) + params['feature_set'] + '.pkl'
joblib.dump(model, clf_name)

In [None]:
random_sample = np.random.randint(0, len(X))
print(X[random_sample])
model.predict(X[random_sample])


In [None]:
loaded_model = joblib.load(clf_name)
loaded_model.predict(X[random_sample])

In [None]:
# samples = np.array(X, dtype=np.float32)
# x_diffs = samples[:, :, 0].flatten()
# y_diffs = samples[:, :, 1].flatten()
# distances_from_home = samples[:, :, 2].flatten()
# types = samples[:, :, 3].flatten()
# normal_xs = NormalDistribution.from_samples(x_diffs)
# normal_ys = NormalDistribution.from_samples(y_diffs)
# normal_dists = NormalDistribution.from_samples(distances_from_home)
# type_distro = DiscreteDistribution.from_samples(types)
# gen_dist = GeneralMixtureModel([normal_xs, normal_ys, normal_dists, type_distro])
# model_pom = HiddenMarkovModel.from_samples(NormalDistribution, n_components=n_components, X=samples)
# print(model_pom)
# model_pom.plot()
# sample_in = np.array(X[0], dtype=np.float32)
# print sample_in
# print model_pom.predict(sample_in, algorithm="viterbi")
# filename = "clf/pomegranate5.pkl"
# joblib.dump(model_pom, filename)
# cls = joblib.load("clf/pomegranate5.pkl")
# print cls.predict(np.array(X[0], dtype=np.float32), algorithm="viterbi")