In [1]:
import librosa
import librosa.display
import matplotlib.pyplot
import matplotlib.pyplot as plt
import numpy as np
import sklearn.cluster
# from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from hmmlearn import hmm
import numpy
import os
import scipy
import glob
from sklearn.metrics import classification_report
from statsmodels.tsa.vector_ar.var_model import VAR
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LinearRegression

### Problem 1. Some clustering

In [None]:
class PCA():
    def __init__(self, n_components: int) -> None:
        self.n_components = n_components
        return

    def _X_mean(self, X: numpy.ndarray) -> numpy.ndarray:
        X_mean = numpy.mean(a=X, axis=1, keepdims=True)   # mean of matrix X  [X, ]
        one_vec = numpy.ones(shape=(1, X.shape[1]))       # [1 x observations]
        X_mean_mat = X_mean@one_vec                       # features mean matrix   [# of features x # of samples]
        X_mean_center = X - X_mean_mat                    # mean-centered matrix X [# of features x # of samples]
        return X_mean_center

    def fit_transform(self, X: numpy.ndarray) -> tuple[numpy.ndarray, numpy.ndarray]:
        X_mean_center = self._X_mean(X=X)
        C = numpy.cov(m=X_mean_center, rowvar=True, bias=False)
        eigenvals, eigenvecs = scipy.linalg.eig(a=C)
        eigenval_indices = numpy.argsort(a=eigenvals)[::-1]
        eigenvals = eigenvals[eigenval_indices].real
        eigenvecs = eigenvecs[:, eigenval_indices].real
        Lambda = scipy.linalg.inv(a=numpy.diag(v=numpy.sqrt(eigenvals[:self.n_components])))
        U = eigenvecs[:, :self.n_components]
        W = Lambda@U.T
        Y = W@X_mean_center
        return W, Y

    def reconstruct(self, X: numpy.ndarray) -> numpy.ndarray:
        X_mean_mat, X_mean_center = self._X_mean(X=X)
        Z = self.U[:, :self.n_components].T@X_mean_center
        X_hat = self.U[:, :self.n_components]@Z+X_mean_mat
        return X_hat

In [None]:
class KMeans():
    def __init__(self, n_clusters: int, tol: float) -> None:
        self.n_clusters = n_clusters
        self.tol = tol
        self.cluster_centers_ = None
        self.labels_ = None

        return

    def fit(self, X: numpy.ndarray) -> tuple[numpy.ndarray, numpy.ndarray]:
        cluster_center_indices = numpy.random.choice(a=X.shape[0], size=self.n_clusters, replace=False)
        cluster_centers = X[cluster_center_indices]
    
        while True:
            dists = scipy.linalg.norm(a=(X[:, numpy.newaxis]-cluster_centers), axis=2)
            labels = numpy.argmin(a=dists, axis=1)

            new_cluster_centers = []
            for i in range(self.n_clusters):
                cluster_mu = X[labels==i].mean(axis=0)
                new_cluster_centers.append(cluster_mu)
            new_cluster_centers = numpy.vstack(tup=new_cluster_centers, dtype=numpy.float32)

            if numpy.all(scipy.linalg.norm(a=(new_cluster_centers-cluster_centers), axis=0) <= self.tol):
                break

            cluster_centers = new_cluster_centers

        self.cluster_centers_ = cluster_centers
        self.labels_ = labels

        return self

    def predict(self, X: numpy.ndarray) -> numpy.ndarray:
        dists = scipy.linalg.norm(a=(X[:, numpy.newaxis]-self.cluster_centers_), axis=2)

        return numpy.argmin(a=dists, axis=1)

In [None]:
def time_to_frames(times: float, sr: int, hop_length: int, n_fft: int=None) -> int:
    return librosa.time_to_frames(times=times, sr=sr, hop_length=hop_length, n_fft=n_fft)

def create_labels(label_filepath: str, sr: int, hop_length: int, n_fft: int=None) -> numpy.ndarray:
    dtypes = [("start_time", "float64"), ("end_time", "float64"), ("label", "U10")]
    label_data = np.genfromtxt(fname=label_filepath, dtype=dtypes, delimiter='\t', skip_header=0, skip_footer=0)

    classes = numpy.array([label_info[2] for label_info in label_data])
    classes = numpy.unique(ar=classes)

    frame_labels = numpy.array(object=[], dtype=numpy.uint8)
    for label_info in label_data:
        start_time, end_time, label = label_info
        start_frame = time_to_frames(times=start_time, sr=sr, hop_length=hop_length, n_fft=N_FFT)
        end_frame = time_to_frames(times=end_time, sr=sr, hop_length=hop_length, n_fft=N_FFT)
        label_id = numpy.where(classes==label)[0]
        labels = numpy.full(shape=(end_frame-start_frame), fill_value=label_id, dtype=numpy.uint8)
        frame_labels = numpy.append(arr=frame_labels, values=labels)
    frame_labels = numpy.append(arr=frame_labels, values=label_id)
    print(frame_labels.shape)
    
    return frame_labels

In [None]:
SAMPLING_RATE = 22050
N_FFT = 2048
HOP_LENGTH = N_FFT//4
WIN_LENGTH = N_FFT//2

dataset_dir = "friends"

audio_filepath = dataset_dir + "/friends.wav"

In [None]:
y, _ = librosa.load(path=audio_filepath, sr=SAMPLING_RATE, dtype=numpy.float64)
spec = librosa.stft(y=y, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH)
spec = numpy.abs(spec)
spec_sqrt = numpy.sqrt(spec)

matplotlib.pyplot.rc('font', family='serif')
matplotlib.pyplot.rc(group="text", usetex=True)
fig, ax = matplotlib.pyplot.subplots(1, 2, figsize=(15, 5))
img_0 = librosa.display.specshow(data=spec_sqrt, sr=SAMPLING_RATE, hop_length=HOP_LENGTH, n_fft=N_FFT,
                                 win_length=WIN_LENGTH, x_axis='s', y_axis='log', ax=ax[0])
fig.colorbar(img_0, ax=[ax[0]], label=r"$\sqrt{\textnormal{Magnitude}}$")
ax[0].set_title("Spectrogram")

spec_db = librosa.amplitude_to_db(S=spec, ref=numpy.max)
img_1 = librosa.display.specshow(data=spec_db, sr=SAMPLING_RATE, hop_length=HOP_LENGTH, n_fft=N_FFT,
                                 win_length=WIN_LENGTH, x_axis='s', y_axis="log", ax=ax[1])
fig.colorbar(img_1, ax=[ax[1]], format=r"$%+2.0f$ dB")
ax[1].set_title("Spectrogram")

matplotlib.pyplot.show()

In [None]:
print(spec.shape)
pca = PCA(n_components=40)
W, Y = pca.fit_transform(X=spec)
print(Y.shape)

In [None]:
mfccs = librosa.feature.mfcc(y=y, sr=SAMPLING_RATE, n_mfcc=20, n_fft=N_FFT, hop_length=HOP_LENGTH,
                             win_length=WIN_LENGTH)
print(mfccs.shape)
pca1 = PCA(n_components=10)
W1, Y1 = pca1.fit_transform(X=mfccs)

feat = numpy.vstack((Y, Y1), dtype=numpy.float64)
print(feat.shape)

In [None]:
kmeans = KMeans(n_clusters=3, tol=1e-5).fit(X=spec.T)
labels = kmeans.predict(X=spec.T)

In [None]:
print(labels.shape)
print(labels[7000:8000])

In [None]:
# times = librosa.frames_to_time(frames=numpy.arange(labels.shape[0]), sr=SAMPLING_RATE, hop_length=HOP_LENGTH, n_fft=N_FFT)
# print(times.shape)
# print(times[:1000])

In [None]:
label_filepath = dataset_dir + "/labels.txt"

frame_labels = create_labels(label_filepath=label_filepath, sr=SAMPLING_RATE, hop_length=HOP_LENGTH, n_fft=N_FFT)
print(frame_labels[7000:8000])

In [None]:
frame_times = librosa.frames_to_time(frames=np.arange(spec_db.shape[1]), sr=sr)
print(frame_times)
print(len(frame_times))

### Problem 2. Recognition of temporal sequences

In [None]:
SAMPLING_RATE = 22050
N_FFT = 2048
HOP_LENGTH = N_FFT//4
WIN_LENGTH = N_FFT//2
N_MFCC = 20

In [None]:
num_map = {"zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
           "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9}

dataset_dir = "digits"
digit_dirs = os.listdir(path=dataset_dir)

fig, axs = plt.subplots(10, 5, figsize=(10, 20), gridspec_kw={"width_ratios": [1, 1, 1, 1, 1.3]},
                        sharex=True, sharey=True)
matplotlib.pyplot.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.95, wspace=0.50, hspace=0.50)

for i, digit_dir in enumerate(digit_dirs):
    for j in range(5):
        audio_filepath = f"digits/{digit_dir}/{digit_dir}_{j}.wav"
        y, _ = librosa.load(path="digits/one/one_0.wav", sr=SAMPLING_RATE, dtype=numpy.float64)
        spec = librosa.stft(y=y, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH)
        spec = numpy.abs(spec)
        spec_sqrt = numpy.sqrt(spec)
        img = librosa.display.specshow(data=spec_sqrt, sr=SAMPLING_RATE, hop_length=HOP_LENGTH, n_fft=N_FFT,
                                       win_length=WIN_LENGTH, x_axis='s', y_axis="log", ax=axs[i, j])
        axs[i, j].set_title(f'Digit {num_map[digit_dir]}')
        axs[i, j].set_xlabel(r'Time $(s)$')
        axs[i, j].set_ylabel(r'Frequency (Hz)')

        if j == 4:
            axs[i, j].set_aspect('auto')
            cbar = fig.colorbar(img, ax=axs[i, j], pad=0.1)
            cbar.set_label(label=r"$\sqrt{\textnormal{Magnitude}}$")

matplotlib.pyplot.tight_layout()
matplotlib.pyplot.show()

In [None]:
class SpokenDigit():
    def __init__(self, dataset_dir: str, num_map: dict) -> None:
        self.classes = numpy.array(object=list(num_map.values()), dtype=numpy.uint8)
        self.data = []
        self.labels = []
        self.__load_data_labels(dataset_dir=dataset_dir, num_map=num_map)
        self.data_train_ = []
        self.labels_train_ = []
        self.data_test_ = []
        self.labels_test_ = []

        return

    def __load_data_labels(self, dataset_dir: str, num_map: dict) -> None:
        digit_dirs = list(num_map.keys())

        for i, digit_dir in enumerate(digit_dirs):
            filepath = os.path.join(dataset_dir, digit_dir, f"{digit_dir}_*.wav")
            filepaths = glob.glob(pathname=filepath)

            data = []
            for filepath in filepaths:
                y, sr = librosa.load(path=filepath)
                m = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=N_MFCC, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH,
                                         dtype=numpy.float64)
                data.append(m.T)

            self.data.append(data)
            labels = numpy.full(shape=(len(filepaths)), fill_value=i, dtype=numpy.uint8)
            
            self.labels.append(labels)

        return

    def train_test_split(
            self,
            test_pct: float,
            train_pct: float,
            rand_state: int
    ) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray]:
        if rand_state is not None:
            numpy.random.seed(seed=rand_state)

        for data, label in zip(self.data, self.labels):
            test_size = int(len(data)*test_pct)
            indices = numpy.arange(len(data))
            numpy.random.shuffle(indices)
            indices_test = indices[:test_size]

            data_class_train = []
            labels_class_train = []
            for i, (d, l) in enumerate(zip(data, label)):
                if i not in indices_test:  
                    data_class_train.append(d)
                    labels_class_train.append(l)
                else:
                    self.data_test_.append(d)
                    self.labels_test_.append(l)

            self.data_train_.append(data_class_train)
            self.labels_train_.append(labels_class_train)

        self.labels_test_ = numpy.hstack(tup=self.labels_test_, dtype=numpy.uint8)

        return

In [None]:
class SpokenDigitRecog():
    def __init__(
            self,
            dataset_dir: str,
            num_map: dict,
            test_pct: float,
            train_pct: float,
            rand_state: int
    ) -> None:
        self.num_map = num_map
        self.spoken_digit = SpokenDigit(dataset_dir=dataset_dir, num_map=num_map)
        self.spoken_digit.train_test_split(test_pct=test_pct, train_pct=train_pct, rand_state=rand_state)
        self.models_ = {}

    def train_HMM(self) -> None:
        for i in range(len(self.spoken_digit.data_train_)):
            model = hmm.GMMHMM(n_components=10)
            data_train = self.spoken_digit.data_train_[i]
            data_train = numpy.vstack(tup=data_train, dtype=numpy.float64)
            model.fit(data_train)
            self.models_[i] = model

        return

    def test_HMM(self) -> None:
        preds = numpy.array(object=[], dtype=numpy.uint8)

        for i in range(len(self.spoken_digit.data_test_)):
            data_test = self.spoken_digit.data_test_[i]
            scores = numpy.array(object=[], dtype=numpy.float64)
            for cls in list(self.models_.keys()):
                model = self.models_[cls]
                score = model.score(data_test)
                scores = numpy.append(arr=scores, values=score)
            pred = numpy.argmax(a=scores, axis=0)
            preds = numpy.append(arr=preds, values=pred)

        report = classification_report(y_true=self.spoken_digit.labels_test_, y_pred=preds,
                                       labels=list(self.num_map.values()),
                                       target_names=list(self.num_map.keys()))
        print(report)

        return

In [None]:
sdr = SpokenDigitRecog(dataset_dir=dataset_dir, num_map=num_map,
                       test_pct=0.25, train_pct=0.75, rand_state=None)
sdr.train_HMM()
sdr.test_HMM()

In [None]:
# K-means clustering
kmeans = sklearn.cluster.KMeans(n_clusters=3, tol=1e-3)
kmeans_labels = kmeans.fit_predict(spec.T)

In [None]:
print(len(kmeans_labels))
print(kmeans_labels[:1000])

In [None]:
# GMM clustering
gmm = GaussianMixture(n_components=3)
gmm_labels = gmm.fit_predict(spec.T)

In [None]:
print(len(gmm_labels))
print(gmm_labels[:1000])

In [None]:
# HMM clustering
hmm_model = hmm.GMMHMM(n_components=3)
hmm_model.fit(spec.T)
hmm_labels = hmm_model.predict(spec.T)

### Problem 3. Activity recognition

In [8]:
def create_class_map(class_filepath: str, class_names: list[str]) -> dict:
    dtypes = [("labels", "uint8"), ("activity", "U20")]
    class_info = numpy.genfromtxt(fname=class_filepath, dtype=dtypes, delimiter=' ', skip_header=0, skip_footer=0)
    class_map = dict(class_info)
    class_map = {k-1: v for k, v in class_map.items() if v in class_names}

    return class_map

In [79]:
class HAR():
    def __init__(self, dataset_dir: str, classes: numpy.ndarray) -> None:
        self.classes = classes
        self.data = None
        self.labels = None
        self.__load_data(dataset_dir=dataset_dir)
        self.__load_labels(dataset_dir=dataset_dir)
        self.__filter(classes=classes)
        self.data_train_ = []
        self.labels_train_ = []
        self.data_test_ = None
        self.labels_test_ = None
        self.data_train_test_ = None
        self.labels_train_test_ = None

        return

    def __load_data(self, dataset_dir: str) -> None:
        x_filepath = os.path.join(dataset_dir, "test", "Inertial_Signals", "total_acc_x_test.txt")
        y_filepath = os.path.join(dataset_dir, "test", "Inertial_Signals", "total_acc_y_test.txt")
        z_filepath = os.path.join(dataset_dir, "test", "Inertial_Signals", "total_acc_z_test.txt")
        x = numpy.loadtxt(fname=x_filepath, dtype=numpy.float64)
        y = numpy.loadtxt(fname=y_filepath, dtype=numpy.float64)
        z = numpy.loadtxt(fname=z_filepath, dtype=numpy.float64)
        self.data = numpy.stack(arrays=(x, y, z), axis=2)

        return

    def __load_labels(self, dataset_dir: str) -> None:
        label_filepath = os.path.join(dataset_dir, "test", "y_test.txt")
        self.labels = numpy.loadtxt(fname=label_filepath, dtype=numpy.uint8)
        self.labels -= 1

        return

    def __filter(self, classes: list[int]) -> None:
        mask = numpy.isin(element=self.labels, test_elements=classes)
        N = numpy.count_nonzero(a=mask)
        self.data = self.data[mask]
        self.labels = self.labels[mask]

        assert N == self.data.shape[0] == self.labels.shape[0]

    def train_test_split(
            self,
            test_pct: float,
            train_pct: float,
            rand_state: int
    ) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray]:
        if rand_state is not None:
            numpy.random.seed(seed=rand_state)

#         mu = numpy.mean(a=self.data, axis=0, dtype=numpy.float64)
#         self.data -= mu

#         data_train = []
#         labels_train = []
        data_test = []
        labels_test = []
        for cls in self.classes:
            indices_cls = numpy.where(self.labels==cls)[0]
            test_size = int(indices_cls.shape[0]*test_pct)
            indices_cls_test = numpy.random.choice(a=indices_cls, size=test_size, replace=False)
            # print(indices_cls_test.shape)
            mask = numpy.isin(element=indices_cls, test_elements=indices_cls_test, assume_unique=True)
            indices_cls_train = indices_cls[~mask]
            # print(indices_cls_train.shape)
            self.data_train_.append(self.data[indices_cls_train])
            self.labels_train_.append(self.labels[indices_cls_train])
            data_test.append(self.data[indices_cls_test])
            labels_test.append(self.labels[indices_cls_test])

        self.data_train_test_ = numpy.vstack(tup=self.data_train_, dtype=numpy.float64)
        self.labels_train_test_ = numpy.hstack(tup=self.labels_train_, dtype=numpy.uint8)
        # print(self.data_train_test_.shape)
        # print(self.labels_train_test_.shape)
        
        assert self.data_train_test_.shape[0] == self.labels_train_test_.shape[0]

        self.data_test_ = numpy.vstack(tup=data_test, dtype=numpy.float64)
        self.labels_test_ = numpy.hstack(tup=labels_test, dtype=numpy.uint8)
        # print(self.data_test_.shape)
        # print(self.labels_test_.shape)
        assert self.data_test_.shape[0] == self.labels_test_.shape[0]

#         for a,b in zip(data_train, labels_train):
#             print(len(a), len(b))
#         print("-------")
#         for a,b in zip(data_test, labels_test):
#             print(a.shape, b.shape)

        return

In [80]:
class VAR():
    def __init__(self, p: int) -> None:
        self.p = p
        self.model = LinearRegression()

        return

    def __train_x(self, X: numpy.ndarray, p: int, steps: int) -> numpy.ndarray:
        n = X.shape[0]

        data_train = []
        for i in range(steps):
            data_train.append(X[i:i+p].flatten(order='C'))

        X = numpy.vstack(tup=data_train, dtype=numpy.float64)

        return X

    def fit(self, X: numpy.ndarray, y: numpy.ndarray, p:int, steps: int) -> None:
        X = self.__train_x(X=X, p=p, steps=steps)
        self.model.fit(X=X, y=y)

        return

    def predict(self, X: numpy.ndarray, steps: int) -> None:
        return

In [157]:
class ActRecog():
    def __init__(self, dataset_dir: str, class_map: dict, lag_order: int) -> None:
        self.classes = numpy.unique(ar=list(class_map.keys()))
        self.class_names = list(class_map.values())
        self.lag_order = lag_order
        self.har = HAR(dataset_dir=dataset_dir, classes=self.classes)
        self.har.train_test_split(test_pct=0.5, train_pct=0.5, rand_state=None)
        self.VAR_models_ = {}

        return

    def __test_x(self, X: numpy.ndarray, p: int, steps: int) -> numpy.ndarray:
        n = X.shape[0]

        data_test = []
        for i in range(steps):
            data_test.append(X[i:i+p].flatten(order='C'))

        X = numpy.vstack(tup=data_test, dtype=numpy.float64)

        return X

    def train_VAR_models(self, p: int, steps: int) -> None:
        for cls in self.classes:
            W = []
            b = []

            for X, y in zip(self.har.data_train_[cls], self.har.labels_train_[cls]):
                y = X[p:p+steps]
                var = VAR(p=p)
                var.fit(X=X, y=y, p=p, steps=steps)
#                 print(var.model.coef_)
#                 print(var.model.intercept_)
                W.append(var.model.coef_)
                b.append(var.model.intercept_)

            W = numpy.stack(arrays=W, axis=0, dtype=numpy.float64)
            W = numpy.mean(a=W, axis=0, dtype=numpy.float64)
            b = numpy.stack(arrays=b, axis=0, dtype=numpy.float64)
            b = numpy.mean(a=b, axis=0, dtype=numpy.float64)
#             print(W.shape)
#             print(b.shape)
            self.VAR_models_[cls] = [W, b]

#                 model = VAR(X)
#                 var_res = model.fit(maxlags=self.lag_order, method="ols", ic=None, trend='c')
#                 VAR_models_class.append(var_res.params)

#             VAR_models_class = numpy.stack(arrays=VAR_models_class, axis=0, dtype=numpy.float64)
#             self.VAR_models_[i] = VAR_models_class

    def test_VAR_models(self, p: int, steps: int, data: str) -> None:
        if data == "train":
            data_test = self.har.data_train_test_
            labels_test = self.har.labels_train_test_
        elif data == "test":
            data_test = self.har.data_test_
            labels_test = self.har.labels_test_
        else:
            print("[ERROR]: Invalid data type.")

        preds = numpy.array(object=[], dtype=numpy.uint8)
        for X, y in zip(data_test, labels_test):
            loss = numpy.array(object=[], dtype=numpy.float64)
            for cls in range(0, self.classes.size):
                forecasts = []
                W, b = self.VAR_models_[cls]
                x = X[:p].flatten(order='F')
                for step in range(steps):
                    forecast = x@W.T + b
                    forecasts.append(forecast)
                    x = numpy.hstack(tup=[x[3:], forecast], dtype=numpy.float64)

                forecasts = numpy.vstack(tup=forecasts, dtype=numpy.float64)
                y_true = X[self.lag_order:self.lag_order+steps]
                mse = scipy.linalg.norm(a=(forecasts-y_true))
                loss = numpy.append(arr=loss, values=mse)
            pred = numpy.argmin(a=loss, axis=0)
            preds = numpy.append(arr=preds, values=pred)
        print(preds)
        print(self.har.labels_test_)
        report = classification_report(y_true=labels_test, y_pred=preds, labels=self.classes,
                                       target_names=self.class_names)
        print(report)

        conf_mat_test = sklearn.metrics.confusion_matrix(y_true=labels_test, y_pred=preds,
                                                         labels=self.classes)
        print(conf_mat_test)

#         preds = numpy.array(object=[], dtype=numpy.uint8)
#         for X, y in zip(data_test, labels_test):
#             loss = numpy.array(object=[], dtype=numpy.float64)
#             for cls in range(0, self.classes.size):
#                 forecasts = []
#                 for step in range(steps):
#                     W = numpy.mean(a=self.VAR_models_[cls], axis=0)
#                     X_test = X[-self.lag_order:].flatten(order='F')
#                     X_test = X_test[:, numpy.newaxis]
#                     X_test = numpy.append(X_test, 1)
#                     forecast = X_test@W
#                     X_test = numpy.reshape(a=X_test[:-1], newshape=(-1, 3), order='F')
#                     X_test = numpy.vstack((X_test[1:], forecast))
#                     forecasts.append(forecast)
#                 forecasts = numpy.vstack(tup=forecasts, dtype=numpy.float64)
#                 y_true = X[self.lag_order:self.lag_order+steps]
#                 mse = scipy.linalg.norm(a=(forecasts-y_true))
#                 loss = numpy.append(arr=loss, values=mse)
#             pred = numpy.argmin(a=loss, axis=0)
#             preds = numpy.append(arr=preds, values=pred)
#         print(preds)
#         print(self.har.labels_test_)
#         report = classification_report(y_true=labels_test, y_pred=preds, labels=self.classes,
#                                        target_names=self.class_names)
#         print(report)

#         conf_mat_test = sklearn.metrics.confusion_matrix(y_true=labels_test, y_pred=preds,
#                                                          labels=self.classes)
#         print(conf_mat_test)

        return

In [158]:
LAG_ORDER = 50
STEPS = 50

dataset_dir = "HAR"
class_filepath = dataset_dir + "/activity_labels.txt"
class_names = ["WALKING", "WALKING_UPSTAIRS", "WALKING_DOWNSTAIRS", "SITTING"]
class_map = create_class_map(class_filepath=class_filepath, class_names=class_names)

In [163]:
actrecog = ActRecog(dataset_dir=dataset_dir, class_map=class_map, lag_order=LAG_ORDER)
actrecog.train_VAR_models(p=100, steps=28)
actrecog.test_VAR_models(p=100, steps=28, data="train")
# actrecog.test_VAR_models(steps=STEPS, data="test")

[1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 0 3 3 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 2 2 3 2 2 2 2 2 2 2 0
 3 2 2 0 2 2 2 2 2 2 2 2 2 0 2 2 2 0 0 2 0 2 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3
 3 3 0 0 2 2 2 2 2 2 2 2 2 2 2 2 0 2 2 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 0 0 0 0 0 0 0 0 0 1 0 0 0 2 0 0 2 0 2 0 0 0 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 0 1 1 0 0 0 0 1 0 1 0 1 1 2 0 2 2 2 2
 2 0 0 0 2 0 0 0 2 2 2 2 0 0 0 2 1 2 2 2 2 0 2 1 2 2 1 1 1 1 1 1 1 1 1 1 1
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 2 2 2 2 2 2 0 0 0 0 0 2 0 0 3 2 2 2 0 2 1
 0 0 0 1 1 0 0 2 1 1 2 2 2 2 1 1 1 2 1 0 2 2 2 2 0 1 1 0 0 3 3 3 3 3 3 3 3
 3 0 0 0 2 0 0 2 2 2 2 0 0 0 0 0 0 0 1 0 0 0 0 2 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 0 2 2 2 0 0 0 0 0 1 0 0 0 0 0 2 2 1 0 0 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 0 1 1 1 1 