In [1]:
from __future__ import division
%matplotlib
import csv
import sys
import os
import traceback
import json
import pickle
import random
import numpy as np
import pandas as pd
import scipy.io as sio
import seaborn as sns
from scipy import stats
from scipy.signal import argrelextrema
import itertools
import scipy.fftpack as fft
from scipy.stats import norm, skew, kurtosis
from scipy.signal import butter, lfilter, filtfilt
from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
from collections import defaultdict, Counter, OrderedDict
from helper_functions import getListOfFiles, getCSV, getStatistics, remap_interval
from operator import itemgetter
from detect_peaks import detect_peaks
from gaussian_kde import gaussian_kde
import mean_shift as ms
import mean_shift_utils as ms_utils
import meta_features

Using matplotlib backend: MacOSX


In [2]:
acc_axes = ['x','y','z']
mat_dir = '../data/multi_split'
sym_dir = os.path.join(mat_dir)#, 'symbolization')
files = [f.split('__')[1] for f in getListOfFiles(sym_dir, ".mat")]
print ">> {} mat Files found!".format(len(files))

>> 87 mat Files found!


In [None]:
dataset = OrderedDict()
previous_files = None

for ind, current_file in enumerate(files):
    file_data = {}
    for axis in acc_axes:
        ## load correspoding data structure
        mat_content = sio.loadmat(os.path.join(sym_dir, axis+'__'+current_file))
        data = mat_content['exp']
        val = data[0,0]

        ## grab the data
        ts = [x[0] for x in val['sig_data']]
        splitpoints = val['split_cand'][0].tolist()
        weights =  val['weights'][0].tolist()

        # structuring individual axis data
        file_data[axis+'.ts'] = ts
        file_data[axis+'.splits'] = splitpoints[:-1]
        file_data[axis+'.weights'] = weights[:-1]
    dataset[current_file] = file_data
print '>> {} files loaded'.format(len(dataset.keys()))

In [None]:
figsize(20,6)
file_number = 0
figsize(16,3)
markers = ['v', '^', '*']
for i,axis in enumerate(acc_axes):
    plt.plot(dataset[files[file_number]][axis+'.ts'], label= axis)
    y = np.random.rand(1,len(dataset[files[file_number]][axis+'.splits']))
    plt.scatter(dataset[files[file_number]][axis+'.splits'],y, marker=markers[i], label=axis+' split points')
plt.title('Data for ' + files[file_number])
plt.xlim(0,plt.xlim()[1])
plt.ylim(-5,5)
plt.legend()
plt.show()

## getting the separation points

In [None]:
ma_mi_list = {}
cluster_centers = {}
tau = 1
for ind, current_file in enumerate(dataset.keys()):
    fig=figure(figsize=(15,8))
    ax1=fig.add_subplot(311)
    
    split_cand_list = []
    weigth_list = []
    data = dataset[current_file]
    for axis in acc_axes:
        split_cand_list += [(s,axis) for s in data[axis+'.splits']]
        weigth_list     += [(w,axis) for w in data[axis+'.weights']]

    weights = np.array([w[0] for w in weigth_list], np.float)
    samples = np.array([s[0] for s in split_cand_list])

    min, max = 0,len(data['x.ts'])
    x = np.linspace(min, max, max)

    clusters = []

    #Construct a KDE and plot it
    bws = [0.03]#0.01, 0.02, 0.03, 0.06, 0.1]
    ma = None
    mi = None
    for bw in bws:
        pdf = gaussian_kde(samples, bw_method=bw, weights=weights)
        y = pdf(x)
        ax1.plot(x, y, label='weighted kde, bw='+ str(bw))
        # mi = list of minimum indices in y.
        # ma = list of maximum indices in y.
        mi, ma = argrelextrema(y, np.less)[0], argrelextrema(y, np.greater)[0]
        #print "Minima:", x[mi]
        #print "Maxima:", x[ma]

        samples2 = []
        for s,w in zip(split_cand_list,weigth_list):
            samples2.append((s[0],w[0],s[1]))
        samples2 = np.array(samples2)

        for xc in x[mi]:
            ax1.axvline(x=xc, color='k', linestyle='-.')

        ## Get clusters by considering each one as a region between minimas. 
        # Count the itens for each cluster region. O(K), where K are regions defined by minimas.
        for m in range(len(mi)):
            if m == 0:
                clusters.append(samples2[samples < x[mi[m]]].tolist())
            else:
                clusters.append(samples2[(samples >= x[mi[m-1]]) * (samples <= x[mi[m]])].tolist())

        ax1.plot(x[ma], y[ma], 'g^',x[mi], y[mi], 'rv')

    ## Saving maxima e minima
    ma_mi_list[current_file] = {'mi': mi, 'ma': ma}
    
    energies = []
    total_energy = 0
    for ind,c in enumerate(clusters):
        accumulator = 0
        for elem in c:
            accumulator += float(elem[1])
        total_energy += accumulator
        energies.append((accumulator,ind))

    #energies = [(e/total_energy,ind) for inx,e in enumerate(energies)]

    #print 'Total energy: {}'.format(total_energy)
    #print 'Total energy after normalization: {}'.format(sum([e[0] for e in energies]))
    energies.sort(reverse=True, key=lambda tup: tup[0])

    #Plot the split candidates
    markers = ['o','x','^']
    colors = ['b', 'r','m']
    for i,axis in enumerate(acc_axes):
        ax1.scatter(data[axis+'.splits'], np.zeros_like(data[axis+'.splits']),
                    label='Split candidate for {}'.format(axis),
                    marker=markers[i], color='m')

    #Boiler plate
    plt.xticks([s[0] for s in split_cand_list],rotation = 90, fontsize=8)
    ax1.set_title('Split point gaussian kernel density estimation for {}'.format(current_file))
    ax1.set_xlabel('Variable')
    ax1.set_ylabel('Density')
    plt.legend(loc='best', frameon=False)

    final_clusters = []
    accumulator = 0
    for e in energies:
        accumulator += e[0]
        if accumulator > tau*total_energy:
            break
        else:
            final_clusters.append(e[1])

    #print 'Total: {}, Accumulator: {}, Threshold: {}'.format(total_energy,accumulator,tau*total_energy)
    cluster_centers[current_file] = x[ma[final_clusters]]

    ax2 = fig.add_subplot(312, sharex=ax1)
    ax2.plot(x, y, label='weighted kde, bw='+ str(bw))
    for xc in x[ma[final_clusters]]:
            ax2.axvline(x=xc, color='k', linestyle='-.')
    ax2.set_ylim(-0.0003,ax2.get_ylim()[1])
    plt.xticks([s[0] for s in split_cand_list],rotation = 90, fontsize=8)
    plt.legend(loc='best', frameon=False)

    ax3 = fig.add_subplot(313, sharex=ax1)
    for axis in acc_axes:
        ax3.plot(data[axis+'.ts'], label=axis)

    for xc in x[ma[final_clusters]]:
        ax3.axvline(x=xc, color='k', linestyle='-.')     

    plt.xticks(x[ma[final_clusters]],rotation = 90, fontsize=8)
    plt.legend(loc='best', frameon=False)
    plt.tight_layout()
    #plt.ylim(-0.0003,plt.ylim()[1])
    plt.xlim(min,max)
    plt.show()

## Getting the segments

In [None]:
def get_segments(filename,maximas):
    data = dataset[filename]
    x_segments = []
    y_segments = []
    z_segments = []
    alignments = []
    
    min, max = 0,len(data['x.ts'])
    x = np.linspace(min, max, max)
    
    centers = [int(i) for i in sorted(cluster_centers[filename])]
    x_data = np.array(data['x.ts'])
    y_data = np.array(data['y.ts'])
    z_data = np.array(data['z.ts'])
    for i in range(len(centers)):
            if i == 0:
                x_segments.append(x_data[:int(centers[i])])
                y_segments.append(y_data[:int(centers[i])])   
                z_segments.append(z_data[:int(centers[i])])
                alignments.append(0)
            elif i == (len(centers)-1):
                x_segments.append(x_data[int(centers[i-1]):int(centers[i])].tolist())
                x_segments.append(x_data[int(centers[i]):].tolist())
                y_segments.append(y_data[int(centers[i-1]):int(centers[i])].tolist())
                y_segments.append(y_data[int(centers[i]):].tolist())
                z_segments.append(z_data[int(centers[i-1]):int(centers[i])].tolist())
                z_segments.append(z_data[int(centers[i]):].tolist())

                alignments.append(centers[i-1])
                alignments.append(centers[i])
            else:
                x_segments.append(x_data[int(centers[i-1]):int(centers[i])].tolist())
                y_segments.append(y_data[int(centers[i-1]):int(centers[i])].tolist())
                z_segments.append(z_data[int(centers[i-1]):int(centers[i])].tolist())
                alignments.append(centers[i-1])
    return x_segments, y_segments, z_segments, alignments

## Checking data segments equality with original time series.

In [None]:
x_segments = []
y_segments = []
z_segments = []
alignments = []
file_map_list   = []

for f in dataset.keys():
    x_seg, y_seg, z_seg, aligns = get_segments(f,ma_mi_list[f]['ma'])
    ### testing equality
    try:
        assert list(itertools.chain.from_iterable(x_seg)) == dataset[f]['x.ts']
        assert list(itertools.chain.from_iterable(y_seg)) == dataset[f]['y.ts']
        assert list(itertools.chain.from_iterable(z_seg)) == dataset[f]['z.ts']
    except AssertionError:
        _, _, tb = sys.exc_info()
        traceback.print_tb(tb) # Fixed format
        tb_info = traceback.extract_tb(tb)
        filename, line, func, text = tb_info[-1]
        print('An error occurred on line {} in statement {}'.format(line, text))
        print('Equivalence test failed for file {}. ABORTING'.format(f))
        break
    
    for i in range(len(x_seg)):
        x_segments.append(x_seg[i])
        y_segments.append(y_seg[i])
        z_segments.append(z_seg[i])
        alignments.append(aligns[i])
        file_map_list.append('.'.join(f.split('.')[:-1]))
    
print 'ALL files PASSED on equivalence tests. This means the segmentation does not lose data!'

## Framming corpus

In [None]:
df = {
      'x'    : x_segments,
      'y'    : y_segments,
      'z'    : z_segments,
      'align' : alignments,
      'file' : file_map_list
     }

frame = pd.DataFrame(df, index = [range(len(file_map_list))] , columns = ['x','y','z','align','file'])

## Exporting model to pickle.

In [None]:
# open the file for writing
pkl_file_name = 'segments.pkl'
file_object = open(pkl_file_name,'wb')
pickle.dump(frame,file_object)
file_object.close()
print 'pickle DONE!'

## Plot by segment

In [None]:
figure(figsize=(15,3))
filename = dataset.keys()[0]
for xc in cluster_centers[filename]:
    plt.axvline(x=xc, color='m', linestyle='-.')

x = list(itertools.chain.from_iterable(frame.loc[frame['file'] == '.'.join(filename.split('.')[:-1])]['x'].tolist()))
y = list(itertools.chain.from_iterable(frame.loc[frame['file'] == '.'.join(filename.split('.')[:-1])]['y'].tolist()))
z = list(itertools.chain.from_iterable(frame.loc[frame['file'] == '.'.join(filename.split('.')[:-1])]['z'].tolist()))
plt.plot(x,'-', label='x')
plt.plot(y, '-', label='y')
plt.plot(z, '-', label='z')
plt.title("Reconstructed file {}".format(filename))
plt.legend()
plt.xticks(cluster_centers[filename], rotation = 90)
#plt.ylim(-6,6)
plt.show()

____

## Bandwith selection

In [None]:
ma_mi_list = {}
cluster_centers = {}
tau = 1#0.95
current_file = dataset.keys()[0]
fig=figure(figsize=(15,10))
ax1=fig.add_subplot(311)

split_cand_list = []
weigth_list = []
data = dataset[current_file]
for axis in acc_axes:
    split_cand_list += [(s,axis) for s in data[axis+'.splits']]
    weigth_list     += [(w,axis) for w in data[axis+'.weights']]

weights = np.array([w[0] for w in weigth_list], np.float)
samples = np.array([s[0] for s in split_cand_list])

min, max = 0,len(data['x.ts'])
x = np.linspace(min, max, max)

clusters = []

#Construct a KDE and plot it
bws = [0.03,0.01, 0.02, 0.03, 0.06]
for bw in bws:
    pdf = gaussian_kde(samples, bw_method=bw, weights=weights)
    y = pdf(x)
    ax1.plot(x, y, label='weighted kde, bw='+ str(bw))


#Plot the split candidates
markers = ['o','x','^']
colors = ['b', 'r','m']
for i,axis in enumerate(acc_axes):
    ax1.scatter(data[axis+'.splits'], np.zeros_like(data[axis+'.splits']),
                label='Split candidate for {}'.format(axis),
                marker=markers[i], color='m')

#Boiler plate
plt.xticks([s[0] for s in split_cand_list],rotation = 90, fontsize=8)
ax1.set_title('Split point gaussian kernel density estimation for {}'.format(current_file))
ax1.set_xlabel('Cutting point')
ax1.set_ylabel('Density')
plt.legend(loc='best', frameon=False)

ax3 = fig.add_subplot(312, sharex=ax1)
for axis in acc_axes:
    ax3.plot(data[axis+'.ts'], label=axis)   

plt.legend(loc='best', frameon=False)
plt.tight_layout()
ax1.set_xlabel('Acceleration')
ax1.set_ylabel('Cutting point')
plt.xlim(min,max)
plt.show()

# Loading pickle

In [127]:
resframe = []
with open("../pickle/annotation.pkl",'r') as f:
    resframe = pickle.load(f)
resframe.shape

(571, 6)

In [4]:
x_df = None
y_df = None
z_df = None
sup_resframe = resframe[resframe['tag'] !=""]
print 'Supervised dataset dim: {}'.format(sup_resframe.shape)
print 'Columns: {}'.format(sup_resframe.columns.tolist())
for df in sup_resframe.itertuples():
    x = pd.DataFrame({df[0]:df[1]})
    y = pd.DataFrame({df[0]:df[2]})
    z = pd.DataFrame({df[0]:df[3]})
    if df[0] == 0:
        x_df = x
        y_df = y
        z_df = z
    else:
        x_df = pd.concat([x_df,x], ignore_index=True, axis=1)
        y_df = pd.concat([y_df,y], ignore_index=True, axis=1)
        z_df = pd.concat([z_df,z], ignore_index=True, axis=1)
#x_df = pd.concat([x_df,sup_resframe.loc[:,'file':'tag']], ignore_index=True, axis=1)
#x_df.columns = x_df.columns[:-2].tolist() + ['file','tag']
#y_df = pd.concat([y_df,sup_resframe.loc[:,'file':'tag']], ignore_index=True, axis=1)
#y_df.columns = y_df.columns[:-2].tolist() + ['file','tag']
#z_df = pd.concat([z_df,sup_resframe.loc[:,'file':'tag']], ignore_index=True, axis=1)
#z_df.columns = z_df.columns[:-2].tolist() + ['file','tag']
print x_df.shape #all x's + file, tag
print y_df.shape #all y's + file, tag
print z_df.shape #all z's + file, tag


Supervised dataset dim: (286, 6)
Columns: ['x', 'y', 'z', 'align', 'file', 'tag']
(455, 286)
(455, 286)
(455, 286)


In [5]:
supervised = resframe[resframe['tag'] !=""]
print 'Supervised: {}'.format(supervised.shape)

Supervised: (286, 6)


In [6]:
unsupervised = resframe[resframe['tag'] ==""]
print 'Unsupervised: {}'.format(unsupervised.shape)

Unsupervised: (285, 6)


## Visualizing the distribution

#### loading metafeatures dispatcher

In [7]:
import meta_features

In [8]:
function_dispatcher = {
    "mean"     : meta_features.mean,
    "std"      : meta_features.std,
    "max"      : meta_features.max_value,
    "min"      : meta_features.min_value,
    "mad"      : meta_features.mad,
    "sma"      : meta_features.sma,
    "iqr"      : meta_features.iqr,
    "energy"   : meta_features.energy,
    "fft_energy"   : meta_features.fft_energy,
    "maxInds"  : meta_features.maxInds,
    "meanFreq" : meta_features.meanFreq,
    "skewness" : meta_features.skewness,
    "kurtosis" : meta_features.kurtos,
    "freq_skewness" : meta_features.freq_skewness,
    "freq_kurtosis" : meta_features.freq_kurtos,
    "num_peaks"     : meta_features.num_peaks,
    "moving_rmsV1"  : meta_features.moving_rmsV1,
    "rms"           : meta_features.rms,
    "max_min"       : meta_features.max_min,
    "pse"           : meta_features.pse
}

In [9]:
def get_metafeat_vector(segment, mf=["mean","std","max","min","mad","sma",
                                        "iqr","energy","maxInds","meanFreq","skewness","kurtosis"]):
    """
        Compute metafeatures from segment data.
    
        segment : the time series segment
        mf      : list of metafeatures functions to be computed on the segment data.
    """
    
    meta_vector = OrderedDict()

    for f in mf:
        try:
            func = function_dispatcher[f]          # retrieve function
            meta_vector[f] = func(segment)    # compute function on segment.
        except KeyError:
            raise ValueError('Invalid function: {}'.format(f))

    return meta_vector

In [10]:
### windowing function
def win_function(data):
    window_rads = np.linspace(0, np.pi, len(data))
    window = np.sin(window_rads)**2
    return data * window

#### Loading the corresponding data

In [147]:
#{u'High_level': u'Blocking-Dodging'} 30
#        {u'High_level': u'Inactive'} 7
#  {u'High_level': u'Locally-Moving'} 78
#         {u'High_level': u'Running'} 73
#       {u'High_level': u'Sprinting'} 27
#    {u'High_level': u'Stop-Running'} 36
#         {u'High_level': u'Walking'} 35

##### LOADING THE DATA ################
topics = ("{u'High_level': u'Blocking-Dodging'}",
          "{u'High_level': u'Inactive'}",
          "{u'High_level': u'Locally-Moving'}",
          "{u'High_level': u'Running'}",
          "{u'High_level': u'Sprinting'}",
          "{u'High_level': u'Stop-Running'}",
          "{u'High_level': u'Walking'}")

topic = topics[3]

Xs = []
Ys = []
Zs = []

for it, topic in enumerate(topics):
    x_sig_segments = resframe[resframe['tag']== topic]['x'].tolist() 
    y_sig_segments = resframe[resframe['tag']== topic]['y'].tolist()
    z_sig_segments = resframe[resframe['tag']== topic]['z'].tolist()

    ##apply windowing function
    x_sig_segments = [win_function(x_sig) for x_sig in x_sig_segments]
    y_sig_segments = [win_function(y_sig) for y_sig in y_sig_segments]
    z_sig_segments = [win_function(z_sig) for z_sig in z_sig_segments]

    ##### COMPUTING METAFEATURES ###########
    ## metafeatures list to be computed
    #to_compute = ["mean","std","max","min","mad","sma","iqr","rms","maxInds","skewness","kurtosis","freq_skewness","freq_kurtosis"]
    #to_compute = ["mean","std","mad","sma","iqr","skewness","kurtosis","maxInds","rms","num_peaks"]
    to_compute = ["mean", "pse", "sma"]

    x_meta_fts = []
    y_meta_fts = []
    z_meta_fts = []
    
    for s in range(len(z_sig_segments)):
        x_meta_fts.append(get_metafeat_vector(x_sig_segments[s],mf=to_compute).values())
        y_meta_fts.append(get_metafeat_vector(y_sig_segments[s],mf=to_compute).values())
        z_meta_fts.append(get_metafeat_vector(z_sig_segments[s],mf=to_compute).values())
        xy_corr     = meta_features.correlation(x_sig_segments[s], y_sig_segments[s])
        xz_corr     = meta_features.correlation(x_sig_segments[s], z_sig_segments[s])
        yz_corr     = meta_features.correlation(y_sig_segments[s], z_sig_segments[s])
        

    labels = pd.DataFrame(np.array([topic.split("'")[-2] for t in range(len(z_sig_segments))]), columns=['label'])
    X = pd.DataFrame(np.array(x_meta_fts), columns=to_compute)
    Y = pd.DataFrame(np.array(y_meta_fts), columns=to_compute)
    Z = pd.DataFrame(np.array(z_meta_fts), columns=to_compute)
    
    Xs.append(pd.concat([X, labels], axis=1, join_axes=[X.index]))
    Ys.append(pd.concat([Y, labels], axis=1, join_axes=[Y.index]))
    Zs.append(pd.concat([Z, labels], axis=1, join_axes=[Z.index]))


In [149]:
##### PLOTTING RESULTS #####
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

sns.set_style('darkgrid')
sns.set_palette('muted')
sns.set_context("notebook", font_scale=1.5,
                rc={"lines.linewidth": 2.5})

# We choose a color palette with seaborn.
palette = np.array(sns.color_palette("hls", 8))

fig=plt.figure(figsize=(8,6))
#ax=fig.add_subplot(111)
ax = Axes3D(fig)
for it,X in enumerate(Xs):
    ax.scatter(X[to_compute[0]], X[to_compute[1]], X[to_compute[2]], c=palette[it],label=topics[it].split("'")[-2])
    #plt.savefig('result_'+topic.split("'")[-2]+".png")
plt.legend()
plt.show()

In [151]:
topic_map = {"Blocking-Dodging": 0,    #30
              "Inactive": 1,            #7
              "Locally-Moving": 2,      #78
              "Running": 3,             #73
              "Sprinting": 4,           #27
              "Stop-Running": 5,        #36
              "Walking": 6}

In [152]:
X = np.vstack([x.as_matrix()[:,:-1]
               for x in Zs])

y = np.hstack([x['label'].as_matrix()
               for x in Zs])

print y.shape
print X.shape

(286,)
(286, 3)


## PCA

In [165]:
from sklearn.decomposition import PCA as sklearnPCA

sklearn_pca = sklearnPCA(n_components=3)
X_pca = sklearn_pca.fit_transform(X)
print X_pca.shape

def plot_pca():

    fig=plt.figure(figsize=(8,6))
    ax = Axes3D(fig)

    for label,marker,color in zip(
        topic_map.keys(),('^', 's', 'o','<','>','+'),[palette[it] for it in range(6)]):

        #ax.scatter(X[to_compute[0]], X[to_compute[1]], X[to_compute[2]], c=palette[it],label=topics[it].split("'")[-2])
        ax.scatter(X_pca[:,0][y == label],
                X_pca[:,1][y == label],
                X_pca[:,2][y == label],
                marker=marker,
                color=color,
                alpha=0.5,
                label=label
                )

    ax.set_xlabel('PC1')
    ax.set_ylabel('PC2')

    leg = ax.legend(loc='upper right', fancybox=True)
    #leg.get_frame().set_alpha(0.5)
    ax.set_title('PCA: Iris projection onto the first 2 principal components')

    # hide axis ticks
    plt.tick_params(axis="both", which="both", bottom="off", top="off",  
            labelbottom="on", left="off", right="off", labelleft="on")

    # remove axis spines
    #ax.spines["top"].set_visible(False)  
    #ax.spines["right"].set_visible(False)
    #ax.spines["bottom"].set_visible(False)
    #ax.spines["left"].set_visible(False)    

    plt.tight_layout
    plt.grid()

    plt.show()

(286, 3)


In [166]:
plot_pca()

## LDA

In [131]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import LabelEncoder

# LDA
sklearn_lda = LDA(n_components=2)
X_lda_sklearn = sklearn_lda.fit_transform(X_pca, y)

def plot_scikit_lda(X, title):

    ax = plt.subplot(111)
    for label,marker,color in zip(
        topic_map.keys(),('^', 's', 'o','<','>','+'),[palette[it] for it in range(6)]):

        plt.scatter(x=X[:,0][y == label],
                    y=X[:,1][y == label] * -1, # flip the figure
                    marker=marker,
                    color=color,
                    alpha=0.5,
                    label=label)

    plt.xlabel('LD1')
    plt.ylabel('LD2')

    leg = plt.legend(loc='upper right', fancybox=True)
    leg.get_frame().set_alpha(0.5)
    plt.title(title)

    # hide axis ticks
    plt.tick_params(axis="both", which="both", bottom="off", top="off",  
            labelbottom="on", left="off", right="off", labelleft="on")

    # remove axis spines
    ax.spines["top"].set_visible(False)  
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)    

    plt.grid()
    plt.tight_layout
    plt.show()


In [132]:
def plot_step_lda():

    ax = plt.subplot(111)
    for label,marker,color in zip(
        range(1,4),('^', 's', 'o'),('blue', 'red', 'green')):

        plt.scatter(x=X_lda[:,0].real[y == label],
                y=X_lda[:,1].real[y == label],
                marker=marker,
                color=color,
                alpha=0.5,
                label=label_dict[label]
                )

    plt.xlabel('LD1')
    plt.ylabel('LD2')

    leg = plt.legend(loc='upper right', fancybox=True)
    leg.get_frame().set_alpha(0.5)
    plt.title('LDA: Iris projection onto the first 2 linear discriminants')

    # hide axis ticks
    plt.tick_params(axis="both", which="both", bottom="off", top="off",  
            labelbottom="on", left="off", right="off", labelleft="on")

    # remove axis spines
    ax.spines["top"].set_visible(False)  
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)    

    plt.grid()
    plt.tight_layout
    plt.show()

In [134]:
#plot_step_lda()
plot_scikit_lda(X_lda_sklearn, title='Default LDA via scikit-learn')

In [263]:
import scipy.fftpack as fft
#fig = plt.figure(figsize=[12,7])
#ax0 = plt.subplot(121)

#t =  [0,10,20,30,40,50,60,70,80,90]; # time scale
#x = [10,120,130,120,120,100,123,456,78,89]; # time series
frequences = []
topic = topics[4]
t_freq = []
for d in resframe[resframe['tag']== topic]['x'].tolist():
    Fs = 45 # sampling frequency 1 kHz
    x = d
    t = range(len(x))
    x = x - np.mean(x);                                            # <= ADDED LINE
    #ax0.plot(t,x)
    #ax0.grid()
    #ax0.set_title('Time series')

    nfft = 16 # next larger power of 2
    y = fft.fft(x,n=nfft)  # Fast Fourier Transform
    ysp = np.abs(y**2); # raw power spectrum density
    yhalf = ysp[:int(nfft/2)]; # half-spectrum
    v = np.max(yhalf); # find maximum
    k = np.argmax(yhalf)
    f_scale = np.array(range(int(nfft/2)))* Fs/nfft;

    T = 1./Fs          #the period, the sample time, the time after which each data come.
    t  = np.linspace(0,len(x)*T,len(x)) # N_samps*T (#samples x sample period) is the signal time.
    freq_axis = np.arange(0,Fs,Fs/nfft)

    ##### FFT of the signal #####
    #plt.subplots_adjust(hspace=0.5, wspace= 0.4)
    #ax = plt.subplot(122)
    #pt, = ax.plot(freq_axis,ysp, lw=2.0, c='b')
    #p = plt.Rectangle((Fs/2, 0), Fs/2, ax.get_ylim()[1], facecolor="grey", fill=True, alpha=0.75, hatch="/", zorder=3)
    #ax.add_patch(p)
    #ax.set_xlim((ax.get_xlim()[0],Fs))
    #ax.set_title('FFT - signal', fontsize= 16, fontweight="bold")
    #ax.set_ylabel('FFT magnitude (power)')
    #ax.set_xlabel('Frequency (Hz)')
    #plt.legend((p,), ('excluded',))
    #plt.grid()

    freq = k*(Fs/nfft)
    if freq == k:
        plt.plot(t,x)
    print 'Dominant freq.: {:.2f} Hz'.format(k*(Fs/nfft))
    #fprintf('Frequency step (resolution) = %f Hznn\n', f_scale(2))
    t_freq.append(freq)
frequences.append(t_freq)

Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 5.62 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz


In [264]:

for i,f in enumerate(frequences):
    print i
    plt.hist(f, color=palette[i], alpha=0.9, label=topics[i])
plt.legend()
plt.show()

0


In [277]:
import scipy.fftpack as fft
fig = plt.figure(figsize=[12,7])
ax0 = plt.subplot(121)

topic = topics[6]
data = resframe[resframe['tag']== topic]['x'].tolist()
topic_bins = []
t_freq = []
for i,x in enumerate(data):
    #t =  [0,10,20,30,40,50,60,70,80,90]; # time scale
    #x = [10,120,130,120,120,100,123,456,78,89]; # time series
    Fs = 45 # sampling frequency 1 kHz
    t = range(len(x))
    x = x - np.mean(x);                                            # <= ADDED LINE
    ax0.plot(t,x)
    ax0.grid()
    ax0.set_title('Time series')

    nfft = 16 # next larger power of 2
    y = fft.fft(x,n=nfft)  # Fast Fourier Transform
    ysp = np.abs(y**2); # raw power spectrum density
    yhalf = ysp[:int(nfft/2)]; # half-spectrum

    v = np.max(yhalf); # find maximum
    k = np.argmax(yhalf)
    f_scale = np.array(range(int(nfft/2)))* Fs/nfft;

    T = 1./Fs          #the period, the sample time, the time after which each data come.
    t  = np.linspace(0,len(x)*T,len(x)) # N_samps*T (#samples x sample period) is the signal time.
    freq_axis = np.arange(0,Fs,Fs/nfft)

    topic_bins.append(yhalf)
    freq = k*(Fs/nfft)
    print 'Dominant freq.: {:.2f} Hz'.format(k*(Fs/nfft))

##### FFT of the signal #####
the_bins = np.array(topic_bins)
print the_bins.shape
plt.subplots_adjust(hspace=0.5, wspace= 0.4)
ax = plt.subplot(122)
pt, = ax.hist(the_bins, lw=2.0, c=palette[i%8])
ax.set_title('FFT - signal {}'.format(topic.split("'")[-2]), fontsize= 16, fontweight="bold")
ax.set_ylabel('FFT magnitude (power)')
ax.set_xlabel('Frequency (Hz)')
plt.legend((p,), ('excluded',))
plt.grid()

Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 2.81 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 5.62 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
Dominant freq.: 0.00 Hz
(35, 8)


AttributeError: Unknown property c