In [18]:
import csv
import math
import os
from scipy.integrate import quad


def calcAvgSensorValue(sensorValues):
    avg = sum(sensorValues) / len(sensorValues)
    avg = round(avg, 9)
    return avg


def calcStdDev(sensorValues, meanVal):
    sd = []
    for num in sensorValues:
        result = num - meanVal
        result = result * result
        sd.append(result)
    return calcAvgSensorValue(sd)


def normalize(sensorValues):
    normalized_sensor = []
    for val in sensorValues:
        val = float(val)
        normalized = 2 * ((val - min(sensorValues)) / (max(sensorValues) - min(sensorValues))) - 1
        normalized_sensor.append(normalized)
    return normalized_sensor


def integral(i):
    return getGaussianVal(i, 0, 0.25)


def getGaussianVal(i, avg, sd):
    i = float(i - avg) / sd
    gauss = math.exp(-i * i / 2.0) / math.sqrt(2.0 * math.pi) / sd
    return gauss


def determineBands():
    numBands = r * 2
    bandList = []
    bandStart = -1
    for i in range(1, numBands):
        integral1, e = quad(integral, (i - r - 1) / r, (i - r) / r)
        integral2, e = quad(integral, -1, 1)
        length_i = 2 * (integral1 / integral2)
        band = bandStart + length_i
        bandList.append(band)
        bandStart = band
    bandList.append(1.0)
    return bandList


def quantize(values, bandList):
    quantized = ""
    for i in range(len(values)):
        bound = -1
        for band in bandList:
            if band >= values[i] > bound:
                quantized += str(bandList.index(band) + 1)
                break
            else:
                bound = band
    return quantized


def getWords():
    wordList = []
    i = 0
    while (i + w - 1) < len(quantizedSensor):
        word = quantizedSensor[i:i + w]
        wordList.append(word)
        i += s
    return wordList


def addToUniqueDict(word_tuple):
    inList = False
    for word in unique_dict:
        if word == word_tuple:
            inList = True
            break
    if not inList:
        unique_dict.append(word_tuple)


def calcAvgQuanAmp():
    avgQuanAmpList = []
    normWord = []
    i = 0
    while (i + w - 1) < len(normSensorVals):
        word = normSensorVals[i:i + w]
        normWord.append(word)
        i += s

    for word in normWord:
        avgAmp = sum(word) / len(word)
        avgQuanAmpList.append(avgAmp)

    return avgQuanAmpList


def getWordsFromFile(file):
    with open(file, "r") as f:
        words = []
        for line in f:
            line = line.strip()
            row = line.split(" - [")  # list of all words as they occur in the file
            if len(row) > 1:
                words.append(row[1])
        f.close()
    return formatWordsFromFile(words)


def formatWordsFromFile(tempList):
    allWords = []
    for sensorWords in tempList:
        sensorWords = sensorWords.replace("]", "")
        sensorWords = sensorWords.replace("'", "")
        wrd = sensorWords.split(", ")
        allWords.append(wrd)
    return allWords


def getUniqueWordsInGesture(allWordsInGesture):
    uniqueWords = []
    for i in range(len(allWordsInGesture)):  # i is each sensor in the gesture
        for word in allWordsInGesture[i]:
            uniqueWord = (direct, i+1, word)
            if uniqueWord not in uniqueWords:
                uniqueWords.append(uniqueWord)
    return uniqueWords


def calcTfValue(wordTuple):
    totalWords = 0
    sensorIndex = wordTuple[1]-1
    num_occurs = 0
    component = wordTuple[0]

    for file in os.listdir(directory + component):
        if file.endswith(".wrd"):
            file = directory + component + "/" + file
            gestFileWords = getWordsFromFile(file)
            num_occurs += gestFileWords[sensorIndex].count(wordTuple[2])
            totalWords += len(gestFileWords[sensorIndex])
    value = num_occurs / totalWords
    return value


def calcIdfValue(wordTuple):
    numObjs = 60
    numObjsWithWord = 0
    sensorIndex = wordTuple[1]-1
    component = wordTuple[0]

    for file in os.listdir(directory + component):
        if file.endswith(".wrd"):
            file = directory + component + "/" + file
            gestFileWords = getWordsFromFile(file)
            if word_tuple[2] in gestFileWords[sensorIndex]:
                numObjsWithWord += 1

    value = math.log(numObjs / numObjsWithWord)
    return value


def writeValsToFile():
    allWordsInGesture = getWordsFromFile(fullFileName)
    uniqueWordsInGesture = getUniqueWordsInGesture(allWordsInGesture)
    sensorId = 1
    #unique_dict is what I want
    for sensor in uniqueWordsInGesture:
        word_tuple = (sensor[0], sensor[1], sensor[2])
        for n, i in enumerate(unique_dict):
            if i == word_tuple:
                tfVal = unique_tf_dict[n]
                tfidfVal = unique_tfidf_dict[n]
                tfFile.write(str(i) + "-")
                tfidfFile.write(str(i) + "-")
                tfFile.write(str(tfVal))
                tfidfFile.write(str(tfidfVal))
                tfFile.write("\n")
                tfidfFile.write("\n")
        
        #sensorId += 1


if __name__ == '__main__':
    # GLOBAL VARIABLES:
    unique_dict = []  # stores list of all unique word tuples found in entire DB in order
    unique_tf_dict = []
    unique_tfidf_dict = []
    all_gesture_dict = []  # stores list of all words found in DB

    # TASK 0
    # TASK 0A
    directory = input("Enter the data directory path: ")
    w = input("Enter the window length: ")
    s = input("Enter the shift length: ")
    r = input("Enter the resolution: ")

    w = int(w)
    s = int(s)
    r = int(r)

    # for each data file create a .wrd file containing the following:
    for direct in os.listdir(directory):
        # for each csv file in X,Y,W,Z:
        if not direct.startswith('.') and not direct.endswith(".xlsx"):
            for filename in os.listdir(directory + direct):
                if filename.endswith(".csv"):
                    bands = determineBands()

                    # generate .wrd file
                    with open(str(directory) + str(direct) + "/" + str(filename) + ".wrd", "w") as wrdFile:

                        sensor_id = 1
                        csvFile = open(str(directory) + str(direct) + "/" + filename, "r")
                        reader = csv.reader(csvFile, delimiter=',')
                        # for each sensor sj in file
                        for sensor in reader:
                            # output component ID, c in output file
                            wrdFile.write(str(direct) + ", ")

                            # write sensorID to wrd file
                            wrdFile.write(str(sensor_id) + ", ")

                            # compute and output average amplitude, avgij of the values
                            sensorVals = list(sensor)
                            sensorVals = [float(i) for i in sensorVals]
                            sensorAvg = calcAvgSensorValue(sensorVals)
                            wrdFile.write(str(sensorAvg) + ", ")

                            # compute and output standard deviations stdij of the values
                            stdDev = calcStdDev(sensorVals, sensorAvg)
                            wrdFile.write(str(stdDev) + ", ")

                            # normalize entries between -1 and 1
                            normSensorVals = normalize(sensorVals)

                            # quantizes entries into 2r levels as in phase 1
                            quantizedSensor = quantize(normSensorVals, bands)

                            # moves a w-length window on time series (by shifting it s units at a time), and at position h
                            sensorWords = getWords()

                            # computes and outputs in file average quantized amplitude avgQijh for window h of sensor sj
                            avgQuanAmp = calcAvgQuanAmp()
                            wrdFile.write(str(avgQuanAmp) + ", " + " - ")

                            # outputs symbolic quantized window descriptor winQijh for the window h of sensor sj
                            wrdFile.write(str(sensorWords) + "\n")

                            # add dictionary of each window to gestureDict list
                            for window in sensorWords:
                                wordDict = (direct, sensor_id, window)
                                all_gesture_dict.append(wordDict)
                                if wordDict not in unique_dict:
                                    unique_dict.append(wordDict)

                            sensor_id += 1
        # The dictionary of the words consists of <componentName, sensorID, winQ>

    # TASK 0B
    for word_tuple in unique_dict:
        tfValue = calcTfValue(word_tuple)
        idfValue = calcIdfValue(word_tuple)
        tf_idf_value = tfValue * idfValue
        unique_tf_dict.append(tfValue)
        unique_tfidf_dict.append(tf_idf_value)

    for direct in os.listdir(directory):
        if not direct.startswith('.') and not direct.endswith(".xlsx"):
            for wrdFile in os.listdir(directory + direct):
                if wrdFile.endswith(".wrd"):
                    fullFileName = directory + direct + "/" + wrdFile
                    tfFile = open(directory + direct + "/tf_vectors_" + wrdFile[:-8] + ".txt", "w")
                    tfidfFile = open(directory + direct + "/tfidf_vectors_" + wrdFile[:-8] + ".txt", "w")
                    writeValsToFile()
                    tfFile.close()
                    tfidfFile.close()
    # End of TASK0

Enter the data directory path: Dataset 6/
Enter the window length: 3
Enter the shift length: 3
Enter the resolution: 3


KeyboardInterrupt: 

In [17]:
#Task 1
import glob
import pandas as pd
import numpy as np
import pickle
from sklearn.decomposition import PCA
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import NMF
from sklearn.decomposition import LatentDirichletAllocation as LDA
import os
import re

numbers = re.compile(r'(\d+)')

#unnecessary lines if you are using anaconda or another 
print("Please enter the following inputs as the same values you used for task 0: ")
directory = input("Enter the data directory path (ex: Data/: ")
w = input("Enter the window length (ex: 3): ")
s = input("Enter the shift length (ex: 3): ")
r = input("Enter the resolution (ex: 3): ")

w = int(w)
s = int(s)
r = int(r)
#end of non-anaconda lines

def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

#returns the top-k topics 
def PCAsetup(wordMat, k):
    #calculate PCA
    pca = PCA(k)
    pc = pca.fit_transform(wordMat)
    UT = pca.components_
    topK = pd.DataFrame(data = UT)
    
    original_df = pd.DataFrame(data = pc)
    original_df.to_pickle("./PCA_" + vectModel + ".pkl")
    
    print(topK)
    return topK

def SVDsetup(wordMat, k):
    #calculate SVD
    svd = TruncatedSVD(k)
    sv = svd.fit_transform(wordMat)
    VT = svd.components_
    topK = pd.DataFrame(data = VT)
    
    original_df = pd.DataFrame(data = sv)
    original_df.to_pickle("./SVD_" + vectModel + ".pkl")
    
    print(topK)
    return topK

def NMFsetup(wordMat, k):
    #calculate NMF
    nmf = NMF(k)
    nm = nmf.fit_transform(wordMat)
    R = nmf.components_
    topK = pd.DataFrame(data = R)
    
    original_df = pd.DataFrame(data = nm)
    original_df.to_pickle("./NMF_" + vectModel + ".pkl")
    
    print(topK)
    return topK

def LDAsetup(wordMat, k):
    #calculate LDA
    lda = LDA(k)
    ld = lda.fit_transform(wordMat)
    V = lda.components_
    topK = pd.DataFrame(data = V)
    
    original_df = pd.DataFrame(data = ld)
    original_df.to_pickle("./LDA_" + vectModel + ".pkl")
    
    print(topK)
    return topK


def makeMat(vectModel):    
    #read files
    Wmat = []
    Xmat = []
    Ymat = []
    Zmat = []
    if vectModel == "tf":
        for axisNum in range(1, 5):
            if axisNum == 1:
                axis = 'W'
            elif axisNum == 2:
                axis = 'X'
            elif axisNum == 3:
                axis = 'Y'
            elif axisNum == 4:
                axis = 'Z'
                
            for file in sorted(glob.glob(directory + axis + "/tf_vectors_*.txt"), key=numericalSort):
                #if not file.endswith(".txt") or not file.startswith("tf_vectors_"):
                    #continue
                #Xmat = []
                #read tf file
                f = open(file, "r")
                tf_vectors = f.readlines()
        
                gestWords = []
                tfVals = []
        
                #split the line into the word and tf value
                for line in tf_vectors:
                    noDash = line.split("-")
                    tf_val = noDash[-1]
                    tf_val = tf_val.replace("\n", "")
                    gestWords.append(noDash[0])
                    tfVals.append(tf_val)
           
        
                index = 0
                startI = "1"
                for y in range(1, w):
                    startI = startI + "1"
                startI = int(startI)
        
                #create dictionary with every word for every sensor and every directory
                numWords = (startI * (2*r) - startI) * 20
                wordMat = []
    
                for i in range(0, numWords + 20):
                    wordMat.append(0)
                
                # put tf values into matrix where column = word
                for x in gestWords:
                    word = x.split(", ")
                    sensorNum = word[1].replace("'", "")
                    wordID = word[2].replace("'", "")
                    wordID = wordID.replace(")", "")
                
                    wordID = int(wordID)
                    sensorNum = int(sensorNum)
                
                    #axisSplit = len(wordMat) / 4
                    sensorSplit = len(wordMat) / 20
                    
                    wordIndex = int(wordID + ((sensorNum - 1) * sensorSplit)) #+ ((axisNum - 1) * axisSplit)))
                    wordIndex = wordIndex - startI
                    
                    #temp
                    if "e" in tfVals[index]:
                        #continue
                        tfVals[index].replace("e", "")
                        tfVals[index].replace("-", "")
                        tfVals[index] = str(float(tfVals[index]) * 0.00001)
                        
                    #temp
            
                    wordMat[wordIndex] = float(tfVals[index])
                    index = index + 1
                

                if axisNum == 1:
                    axis = 'W'
                    Wmat.append(wordMat)
                elif axisNum == 2:
                    axis = 'X'
                    Xmat.append(wordMat)
                elif axisNum == 3:
                    axis = 'Y'
                    Ymat.append(wordMat)
                elif axisNum == 4:
                    axis = 'Z'
                    Zmat.append(wordMat)
                f.close()
                
        finalMat = np.append(Wmat, Xmat, axis = 1)
        finalMat = np.append(finalMat, Ymat, axis = 1)
        finalMat = np.append(finalMat, Zmat, axis = 1)
        
        #print(finalMat)
        
        return finalMat
            #print(Xmat)
    
    elif vectModel == "tfidf":
        for axisNum in range(1, 5):
            if axisNum == 1:
                axis = 'W'
            elif axisNum == 2:
                axis = 'X'
            elif axisNum == 3:
                axis = 'Y'
            elif axisNum == 4:
                axis = 'Z'
                
            for file in sorted(glob.glob(directory + axis + "/tfidf_vectors_*.txt"), key=numericalSort):
                #if not file.endswith(".txt") or not file.startswith("tfidf_vectors_"):
                    #continue
                #read tf file
                f = open(file, "r")
                tf_vectors = f.readlines()
        
                gestWords = []
                tfVals = []
        
                #split the line into the word and tf value
                for line in tf_vectors:
                    noDash = line.split("-")
                    tf_val = noDash[-1]
                    tf_val = tf_val.replace("\n", "")
                    gestWords.append(noDash[0])
                    tfVals.append(tf_val)
           
        
                index = 0
                startI = "1"
                for y in range(1, w):
                    startI = startI + "1"
                startI = int(startI)
        
                #create dictionary with every word for every sensor and every directory
                numWords = (startI * (2*r) - startI) * 20
                wordMat = []
    
                for i in range(0, numWords + 20):
                    wordMat.append(0)
                
                
                # put tf values into matrix where column = word
                for x in gestWords:
                    word = x.split(", ")
                    sensorNum = word[1].replace("'", "")
                    wordID = word[2].replace("'", "")
                    wordID = wordID.replace(")", "")
                
                    wordID = int(wordID)
                    sensorNum = int(sensorNum)
                
                    #axisSplit = len(wordMat) / 4
                    sensorSplit = len(wordMat) / 20
                    
                    wordIndex = int(wordID + ((sensorNum - 1) * sensorSplit)) #+ ((axisNum - 1) * axisSplit)))
                    wordIndex = wordIndex - startI
            
                    #temp
                    if "e" in tfVals[index]:
                        tfVals[index].replace("e", "")
                        tfVals[index].replace("-", "")
                        tfVals[index] = str(float(tfVals[index]) * 0.00001)
                    #temp
                    wordMat[wordIndex] = float(tfVals[index])
                    index = index + 1
                

                if axisNum == 1:
                    axis = 'W'
                    Wmat.append(wordMat)
                elif axisNum == 2:
                    axis = 'X'
                    Xmat.append(wordMat)
                elif axisNum == 3:
                    axis = 'Y'
                    Ymat.append(wordMat)
                elif axisNum == 4:
                    axis = 'Z'
                    Zmat.append(wordMat)
                f.close()
           
        finalMat = np.append(Wmat, Xmat, axis = 1)
        finalMat = np.append(finalMat, Ymat, axis = 1)
        finalMat = np.append(finalMat, Zmat, axis = 1)
        
        #print(finalMat)
        
        return finalMat
            #print(Xmat)
    
def createdictofComponents(topk, k):
    #creates a matrix that contains tuples of the word and score
    outputMat = []
    outputMat = topk
    startI = "1"
    for y in range(1, w):
        startI = startI + "1"
    startI = int(startI)
    numWords = (startI * (2*r) - startI)
    
    for j in range(0, len(topk.columns)):
        axisSplit = len(topk) / 4
        sensorSplit = axisSplit / 20
        
        ax = int(j/axisSplit)
        sens = int((j - (ax * axisSplit))/sensorSplit)
        word = int((j - ((sens * sensorSplit) + (ax * axisSplit))) - startI)
        
        for row in range(0, k):
            if ax == 0:
                axis = 'W'
            elif ax == 1:
                axis = 'X'
            elif ax == 2:
                axis = 'Y'
            elif ax == 3:
                axis = 'Z'
            label = axis + str(sens) + str(word)
            outputMat[j][row] = (topk[j][row], label)
            
        
                
            """for sensor in range(0, 20):
            
                for word in range(0, numWords + 1):
                    label = axis + str(sensor - 1) + str(word + startI)
                    
                    #axisSplit = len(topk) / 4
                    sensorSplit = axisSplit / 20
                    
                    wordIndex = int((word + startI) + (sensor * sensorSplit) + (ax * axisSplit))
                    wordIndex = wordIndex - startI
                    outputMat[wordIndex][row] = (topk[wordIndex][row], label)"""
                
                
    #for i in range(0, k):
     #   word = "w" + str(i)
      #  for j in range(0, len(topk)):
       #     outputMat[i][j] = (topk[i][j], word)
            
    print(outputMat)
    #sorts the words acording to their scores
    outputMat = np.transpose(outputMat)
    outputMat = outputMat.apply(lambda x: x.sort_values(ascending = False).values)
    outputMat = np.transpose(outputMat)
    finalMat = outputMat
    #for i in range(0, k):
     #   for j in range(0, len(outputMat)):
      #      finalMat[i][j] = (outputMat[i][j][1], outputMat[i][j][0])
            
    file = open("./userOutput.txt", "w")
    file.write(str(finalMat))
    file.close()
    return finalMat
    
    
def task1(vectModel, useOp, k):
    
    if useOp == "PCA":
        #PCA
        wordMat = makeMat(vectModel)
        
        topk = PCAsetup(wordMat, k)
        print("\nPCA:\n")
        #print(topk)
        
        dictofComponents = createdictofComponents(topk, k)
        print(dictofComponents)
        
        
    elif useOp == "SVD":
        wordMat = makeMat(vectModel)
        topk = SVDsetup(wordMat, k)
        print("\nSVD:\n")
        #print(topk)
        
        dictofComponents = createdictofComponents(topk, k)
        print(dictofComponents)
        
        
    elif useOp == "NMF":
        wordMat = makeMat(vectModel)
        topk = NMFsetup(wordMat, k)
        print("\nNMF:\n")
        #print(topk)
        
        dictofComponents = createdictofComponents(topk, k)
        print(dictofComponents)

        
    elif useOp == "LDA":
        wordMat = makeMat(vectModel)
        topk = LDAsetup(wordMat, k)
        print("\nLDA:\n")
        #print(topk)
        
        dictofComponents = createdictofComponents(topk, k)
        print(dictofComponents)
        


vectModel = input("Enter the vector model (ex: tf): ")
k = input("Enter k (ex: 4): ")
useOp = input("Enter the analysis you would like to use (ex: PCA): ")
k = int(k)
task1(vectModel, useOp, k)


#sample output: 
#Enter the vector model: tf
#Enter k: 10
#Enter the analysis you would like to use: PCA




Please enter the following inputs as the same values you used for task 0: 
Enter the data directory path (ex: Data/: Dataset 5/
Enter the window length (ex: 3): 3
Enter the shift length (ex: 3): 3
Enter the resolution (ex: 3): 3
Enter the vector model (ex: tf): tf
Enter k (ex: 4): 3
Enter the analysis you would like to use (ex: PCA): PCA
          0             1             2             3             4      \
0 -1.278862e-07  9.940335e-06  1.376658e-16 -8.266911e-18 -3.076799e-17   
1 -3.144890e-08  1.829147e-06 -5.766857e-16  2.741317e-16 -5.359400e-17   
2 -3.887020e-08 -1.881789e-07 -1.016091e-16  7.416768e-17  1.985570e-17   

          5             6             7             8             9      \
0  2.901250e-18 -1.286100e-17 -1.254986e-17  4.785405e-19 -1.247794e-17   
1  2.600323e-19  1.398759e-18  2.363352e-18 -6.837194e-19 -4.121165e-18   
2  6.112208e-18 -8.150797e-18  4.329210e-18  1.363027e-18  7.740278e-19   

       ...       44470  44471  44472  44473  44474  44475 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


KeyboardInterrupt: 

In [23]:
import os
from sklearn.metrics.pairwise import cosine_similarity                          
from math import log
import re
import pandas as pd
from scipy import stats
from scipy import spatial
import pandas as pd
import numpy as np
from math import log2


def dot_similarity(gesture1, gesture2):                  
    x = np.array(gesture1)
    y = np.array(gesture2)
    """if len(x) > len(y):
        y = np.pad(y, (0, len(x) - len(y)))
    else:
        x = np.pad(x, (0, len(y) - len(x)))"""
    return np.dot(x, y)

def pears_similarity(vec1, vec2):
    pearson_coef, p_value = stats.pearsonr(vec1, vec2)
    return pearson_coef
    
def cos_similarity(vec1, vec2):                            
    x = np.array(vec1)
    y = np.array(vec2)                                                    
    similarity = 1- spatial.distance.cosine(x, y)                             
                                                                            
    return similarity                             

def KL_div_similarity(p, q):
    return sum(p[i] * log2(p[i]/q[i]) for i in range(len(p)))

def edit_distance(sensor1, sensor2, m, n):
    # Create a table to store results of subproblems 
    dp = [[0 for x in range(n + 1)] for x in range(m + 1)] 
  
    # Fill d[][] in bottom up manner 
    for i in range(m + 1): 
        for j in range(n + 1): 
  
            # If first string is empty, only option is to 
            # insert all characters of second string 
            if i == 0: 
                dp[i][j] = j    # Min. operations = j 
  
            # If second string is empty, only option is to 
            # remove all characters of second string 
            elif j == 0: 
                dp[i][j] = i    # Min. operations = i 
  
            # If last characters are same, ignore last char 
            # and recur for remaining string 
            elif sensor1[i-1] == sensor2[j-1]: 
                dp[i][j] = dp[i-1][j-1] 
  
            # If last character are different, consider all 
            # possibilities and find minimum 
            else: 
                dp[i][j] = 1 + min(dp[i][j-1],        # Insert 
                                   dp[i-1][j],        # Remove 
                                   dp[i-1][j-1])    # Replace 
  
    return dp[m][n] 
    
def dynamic_time_warping(sensor1, sensor2, m, n):
    dp = [[0 for x in range(n+1)] for x in range(m+1)] 
    
    for i in range(1, m+1): 
        for j in range(1, n+1): 
            dp[i][j] = abs(sensor1[i-1] - sensor2[j-1])
             
            if i == 1 and j == 1:
                continue
            elif i == 1 and j != 1:
                dp[i][j] += dp[i][j-1] 
            elif i != 1 and j == 1:
                dp[i][j] += dp[i-1][j]
            else:
                dp[i][j] += min(dp[i][j-1],        # Insert 
                                dp[i-1][j],        # Remove 
                                dp[i-1][j-1])    # Replace 
            
    return dp[m][n] 

def tfidf_loader(directory):
    gestures = []
    for filename in os.listdir(directory):
        if not filename.startswith("tfidf_") or not filename.endswith(".txt") :
            continue
            
        path = directory+"/"+filename
        with open(path, "r") as w:
            gesture = w.readlines()

        matrix=[]

        for sensor in gesture :
            tfidf = float(sensor.split("-")[1].strip())
            matrix.append(tfidf)
                
        gestures.append(matrix)
        
    return gestures
    
    
def tf_loader(directory):
    gestures = []
    for filename in os.listdir(directory):
        if not filename.startswith("tf_") or not filename.endswith(".txt") :
            continue
            
        path = directory+"/"+filename
        with open(path, "r") as w:
            gesture = w.readlines()

        matrix=[]

        for sensor in gesture :
            tf = float(sensor.split("-")[1].strip())   
            matrix.append(tf)
        gestures.append(matrix)
        
    return gestures
    
def symbol_loader(directory):
    gestures = []
    for filename in os.listdir(directory):
        if not filename.endswith(".wrd") :
            continue
        path = directory+"/"+filename
        with open(path, "r") as w:
            gesture = w.readlines()

        matrix=[]

        for sensor in gesture :
            sym_quant_window = [re.findall(r'\d+',word)[0] for word in sensor.split(" - ")[1].split(",")] 
            matrix.append(sym_quant_window)
                
        gestures.append(matrix)
        
    return gestures

def amplitude_loader(directory):
    gestures = []
    for filename in os.listdir(directory):
        if not filename.endswith(".wrd") :
            continue
        path = directory+"/"+filename
        with open(path, "r") as w:
            gesture = w.readlines()

        matrix=[]

        for sensor in gesture :
            avg_quant_ampitude = [float(word) for word in sensor.split(" - ")[0].split("[")[1].replace("],","").split(",")]
            matrix.append(avg_quant_ampitude)
                
        gestures.append(matrix)
        
    return gestures

###### GET Input from users######
print("Enter gesture file(e.g Data/1.csv) :")
print("* WARNING : Please make .pkl in TASK 1 before you use principal component")
gesture_path = input()

directory = gesture_path.split("/")[0]
#axis = gesture_path.split("/")[1]
filename = gesture_path.split("/")[1]
key_idx = int(re.findall(r'\d+', filename)[0])

print("Enter vector model (tf, tfidf):")
vector_model = input()

print("Enter user options (1 ~ 7)")
print("* HINT : 1 = Dot similarity, 2 = PCA, 3 = SVD, 4 = NMF, 5 = LDA, 6 = Edit Distance, 7 = DTW")
user_option = int(input())  

#Retrieve only top 10 gestures with high similarity
top_K = 10

#Calculate similarity(cost) based on User options
if user_option == 1 :
    directory = directory + "/"
    gestures = makeMat(vector_model)
    directory = directory.replace("/", "")
    
    cost=[]
    key_gesture = gestures[key_idx]
    for gesture in gestures :
        similarity = dot_similarity(key_gesture, gesture)
        cost.append(similarity)
    cost_top_K = sorted(cost, reverse=True)[0:top_K]                                        
    
elif user_option == 2 :
    PC_path = ["PCA", vector_model]
    PC_path = "_".join(PC_path)
    pca = pd.read_pickle(PC_path + ".pkl")
    num = len(pca[0])

    pca = pca.T
    key_vec = pca[key_idx]
    cost=[]
    for idx in range(num) :
        similarity = pears_similarity(key_vec, pca[idx])
        cost.append(similarity)
    cost_top_K = sorted(cost, reverse=True)[0:top_K]                                        

elif user_option == 3 : 
    PC_path = ["SVD", vector_model]
    PC_path = "_".join(PC_path)
    svd = pd.read_pickle(PC_path + ".pkl")                                      
    num = len(svd[0])                                                           
                                                                                
    svd = svd.T                                                                 
    key_vec = svd[key_idx]      
    
    cost=[]                                                                     
    for idx in range(num) :                                                     
        similarity = cos_similarity(key_vec, svd[idx])                            
        cost.append(similarity)                 
    cost_top_K = sorted(cost, reverse=True)[0:top_K]                                        
    
elif user_option == 4 :                                                         
    PC_path = ["NMF", vector_model]
    PC_path = "_".join(PC_path)
    nmf = pd.read_pickle(PC_path + ".pkl")
    num = len(nmf[0])                                                           
                                                                                
    nmf = nmf.T                                                                 
    key_vec = nmf[key_idx]    
    
    cost=[]                                                                     
    for idx in range(num) :                                                     
        similarity = cos_similarity(key_vec, nmf[idx])                            
        cost.append(similarity)           
    cost_top_K = sorted(cost, reverse=True)[0:top_K]                                        
                  
elif user_option == 5 :                                                         
    PC_path = ["LDA", vector_model]
    PC_path = "_".join(PC_path)
    lda = pd.read_pickle(PC_path + ".pkl")
    num = len(lda[0])                                                           
                                                                                
    lda = lda.T                                                                 
    key_vec = lda[key_idx]    
    
    cost=[]                                                                     
    for idx in range(num) :                                                     
        similarity = KL_div_similarity(key_vec, lda[idx])                            
        cost.append(similarity)                                  
    cost_top_K = sorted(cost, reverse=True)[0:top_K]                                        

elif user_option == 6 :
    pathW = directory + "/W"
    pathX = directory + "/X"
    pathY = directory + "/Y"
    pathZ = directory + "/Z"
    gesturesW = symbol_loader(pathW)
    gesturesX = symbol_loader(pathX)
    gesturesY = symbol_loader(pathY)
    gesturesZ = symbol_loader(pathZ)
    
    gestures = np.append(gesturesW, gesturesX, axis = 1)
    gestures = np.append(gestures, gesturesY, axis = 1)
    gestures = np.append(gestures, gesturesZ, axis = 1)
        
    key_gesture = gestures[key_idx]
    
    cost=[0]*len(gestures)
    for i, gesture in enumerate(gestures) :
        for j, sensor in enumerate(gesture) :
            n = len(sensor)
            m = len(key_gesture[j])
            cost[i] += edit_distance(key_gesture[j], sensor, m, n)
    cost_top_K = sorted(cost)[0:top_K]                                        

elif user_option == 7 :
    pathW = directory + "/W"
    pathX = directory + "/X"
    pathY = directory + "/Y"
    pathZ = directory + "/Z"
    gesturesW = amplitude_loader(pathW)
    gesturesX = amplitude_loader(pathX)
    gesturesY = amplitude_loader(pathY)
    gesturesZ = amplitude_loader(pathZ)
    
    gestures = np.append(gesturesW, gesturesX, axis = 1)
    gestures = np.append(gestures, gesturesY, axis = 1)
    gestures = np.append(gestures, gesturesZ, axis = 1)
    
    key_gesture = gestures[key_idx]
    
    cost=[0]*len(gestures)
    for i, gesture in enumerate(gestures) :                                     
        for j, sensor in enumerate(gesture) :                                   
            n = len(sensor)                                                     
            m = len(key_gesture[j])                                             
            cost[i] += dynamic_time_warping(key_gesture[j], sensor, m, n)    
    cost_top_K = sorted(cost)[0:top_K]                                        
else:
    print("ERROR : No such user option in this program")
    
                                                           
print("Most similar (gesture, score) ")              
for k in cost_top_K :                                                       
    print((cost.index(k), k))       


Enter gesture file(e.g Data/1.csv) :
Data/59.csv
Enter vector model (tf, tfidf):
tfidf
Enter user options (1 ~ 7)
* HINT : 1 = Dot similarity, 2 = PCA, 3 = SVD, 4 = NMF, 5 = LDA, 6 = Edit Distance, 7 = DTW
7
Most similar (gesture, score) 
(59, 0.0)
(3, 517.226686729692)
(0, 546.2733304634784)
(32, 548.2429271479214)
(42, 588.7590218944844)
(14, 595.3573641879669)
(2, 599.1992888542443)
(45, 606.6645926870148)
(6, 630.4273810272091)
(36, 636.7623912495804)


In [20]:
#!/usr/bin/env python
# coding: utf-8

# In[13]:


import os
from sklearn.metrics.pairwise import cosine_similarity                          
from math import log
import re
import pandas as pd
from scipy import stats
from scipy import spatial
import pandas as pd
import numpy as np                  
from math import log2  
import glob

numbers = re.compile(r'(\d+)')

print("Please enter the following inputs as the same values you used for task 0: ")
#directory = input("Enter the data directory path (ex: Data/: ")
w = input("Enter the window length (ex: 3): ")
s = input("Enter the shift length (ex: 3): ")
r = input("Enter the resolution (ex: 3): ")

w = int(w)
s = int(s)
r = int(r)

def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts
                               
def makeMat(vectModel):    
    #read files
    Wmat = []
    Xmat = []
    Ymat = []
    Zmat = []
    if vectModel == "tf":
        for axisNum in range(1, 5):
            if axisNum == 1:
                axis = 'W'
            elif axisNum == 2:
                axis = 'X'
            elif axisNum == 3:
                axis = 'Y'
            elif axisNum == 4:
                axis = 'Z'
                
            for file in glob.glob(directory + axis + "/tf_vectors_*.txt"):
                #Xmat = []
                #read tf file
                f = open(file, "r")
                tf_vectors = f.readlines()
        
                gestWords = []
                tfVals = []
        
                #split the line into the word and tf value
                for line in tf_vectors:
                    noDash = line.split("-")
                    tf_val = noDash[1]
                    tf_val = tf_val.replace("\n", "")
                    gestWords.append(noDash[0])
                    tfVals.append(tf_val)
           
        
                index = 0
                startI = "1"
                for y in range(1, w):
                    startI = startI + "1"
                startI = int(startI)
        
                #create dictionary with every word for every sensor and every directory
                numWords = (startI * (2*r) - startI) * 20
                wordMat = []
    
                for i in range(0, numWords + 20):
                    wordMat.append(0)
                
                # put tf values into matrix where column = word
                for x in gestWords:
                    word = x.split(", ")
                    sensorNum = word[1].replace("'", "")
                    wordID = word[2].replace("'", "")
                    wordID = wordID.replace(")", "")
                
                    wordID = int(wordID)
                    sensorNum = int(sensorNum)
                
                    #axisSplit = len(wordMat) / 4
                    sensorSplit = len(wordMat) / 20
                    
                    wordIndex = int(wordID + ((sensorNum - 1) * sensorSplit)) #+ ((axisNum - 1) * axisSplit)))
                    wordIndex = wordIndex - startI
            
                    wordMat[wordIndex] = float(tfVals[index])
                    index = index + 1
                

                if axisNum == 1:
                    axis = 'W'
                    Wmat.append(wordMat)
                elif axisNum == 2:
                    axis = 'X'
                    Xmat.append(wordMat)
                elif axisNum == 3:
                    axis = 'Y'
                    Ymat.append(wordMat)
                elif axisNum == 4:
                    axis = 'Z'
                    Zmat.append(wordMat)
                f.close()
                
        finalMat = np.append(Wmat, Xmat, axis = 1)
        finalMat = np.append(finalMat, Ymat, axis = 1)
        finalMat = np.append(finalMat, Zmat, axis = 1)
        
        #print(finalMat)
        
        return finalMat
            #print(Xmat)
    
    elif vectModel == "tfidf":
        for axisNum in range(1, 5):
            if axisNum == 1:
                axis = 'W'
            elif axisNum == 2:
                axis = 'X'
            elif axisNum == 3:
                axis = 'Y'
            elif axisNum == 4:
                axis = 'Z'
                
            for file in glob.glob(directory + axis + "/tfidf_vectors_*.txt"):
                #read tf file
                f = open(file, "r")
                tf_vectors = f.readlines()
        
                gestWords = []
                tfVals = []
        
                #split the line into the word and tf value
                for line in tf_vectors:
                    noDash = line.split("-")
                    tf_val = noDash[1]
                    tf_val = tf_val.replace("\n", "")
                    gestWords.append(noDash[0])
                    tfVals.append(tf_val)
           
        
                index = 0
                startI = "1"
                for y in range(1, w):
                    startI = startI + "1"
                startI = int(startI)
        
                #create dictionary with every word for every sensor and every directory
                numWords = (startI * (2*r) - startI) * 20
                wordMat = []
    
                for i in range(0, numWords + 20):
                    wordMat.append(0)
                
                # put tf values into matrix where column = word
                for x in gestWords:
                    word = x.split(", ")
                    sensorNum = word[1].replace("'", "")
                    wordID = word[2].replace("'", "")
                    wordID = wordID.replace(")", "")
                
                    wordID = int(wordID)
                    sensorNum = int(sensorNum)
                
                    #axisSplit = len(wordMat) / 4
                    sensorSplit = len(wordMat) / 20
                    
                    wordIndex = int(wordID + ((sensorNum - 1) * sensorSplit)) #+ ((axisNum - 1) * axisSplit)))
                    wordIndex = wordIndex - startI
            
                    wordMat[wordIndex] = float(tfVals[index])
                    index = index + 1
                

                if axisNum == 1:
                    axis = 'W'
                    Wmat.append(wordMat)
                elif axisNum == 2:
                    axis = 'X'
                    Xmat.append(wordMat)
                elif axisNum == 3:
                    axis = 'Y'
                    Ymat.append(wordMat)
                elif axisNum == 4:
                    axis = 'Z'
                    Zmat.append(wordMat)
                f.close()
           
        finalMat = np.append(Wmat, Xmat, axis = 1)
        finalMat = np.append(finalMat, Ymat, axis = 1)
        finalMat = np.append(finalMat, Zmat, axis = 1)
        
        #print(finalMat)
        
        return finalMat
            #print(Xmat)                                                       
                                                                                
def KL_div_similarity(p, q):                                                    
    return sum(p[i] * log2(p[i]/q[i]) for i in range(len(p)))

def dot_similarity(gesture1, gesture2):                  
    x = np.array(gesture1)
    y = np.array(gesture2)
    """if len(x) > len(y):
        y = np.pad(y, (0, len(x) - len(y)))
    else:
        x = np.pad(x, (0, len(y) - len(x)))"""
    return np.dot(x, y)

def pears_similarity(vec1, vec2):
    pearson_coef, p_value = stats.pearsonr(vec1, vec2)
    return pearson_coef
    
def cos_similarity(vec1, vec2):                            
    x = np.array(vec1)
    y = np.array(vec2)                                                    
    similarity = 1- spatial.distance.cosine(x, y)                             
                                                                            
    return similarity                             

def edit_distance(sensor1, sensor2, m, n):
    # Create a table to store results of subproblems 
    dp = [[0 for x in range(n + 1)] for x in range(m + 1)] 
  
    # Fill d[][] in bottom up manner 
    for i in range(m + 1): 
        for j in range(n + 1): 
  
            # If first string is empty, only option is to 
            # insert all characters of second string 
            if i == 0: 
                dp[i][j] = j    # Min. operations = j 
  
            # If second string is empty, only option is to 
            # remove all characters of second string 
            elif j == 0: 
                dp[i][j] = i    # Min. operations = i 
  
            # If last characters are same, ignore last char 
            # and recur for remaining string 
            elif sensor1[i-1] == sensor2[j-1]: 
                dp[i][j] = dp[i-1][j-1] 
  
            # If last character are different, consider all 
            # possibilities and find minimum 
            else: 
                dp[i][j] = 1 + min(dp[i][j-1],        # Insert 
                                   dp[i-1][j],        # Remove 
                                   dp[i-1][j-1])    # Replace 
  
    return dp[m][n] 
    
def dynamic_time_warping(sensor1, sensor2, m, n):
    dp = [[0 for x in range(n+1)] for x in range(m+1)] 
    
    for i in range(1, m+1): 
        for j in range(1, n+1): 
            dp[i][j] = abs(sensor1[i-1] - sensor2[j-1])
             
            if i == 1 and j == 1:
                continue
            elif i == 1 and j != 1:
                dp[i][j] += dp[i][j-1] 
            elif i != 1 and j == 1:
                dp[i][j] += dp[i-1][j]
            else:
                dp[i][j] += min(dp[i][j-1],        # Insert 
                                dp[i-1][j],        # Remove 
                                dp[i-1][j-1])    # Replace 
            
    return dp[m][n] 

def tfidf_loader(directory):
    gestures = []
    for filename in os.listdir(directory):
        if not filename.startswith("tfidf_") or not filename.endswith(".txt") :
            continue
            
        path = directory+"/"+filename
        with open(path, "r") as w:
            gesture = w.readlines()

        matrix=[]

        for sensor in gesture :
            tfidf = float(sensor.split("-")[1].strip())
            matrix.append(tfidf)
                
        gestures.append(matrix)
        
    return gestures
    
    
def tf_loader(directory):
    gestures = []
    for filename in os.listdir(directory):
        if not filename.startswith("tf_") or not filename.endswith(".txt") :
            continue
            
        path = directory+"/"+filename
        with open(path, "r") as w:
            gesture = w.readlines()

        matrix=[]

        for sensor in gesture :
            tf = float(sensor.split("-")[1].strip())   
            matrix.append(tf)
        gestures.append(matrix)
        
    return gestures
    
def symbol_loader(directory):
    gestures = []
    for filename in os.listdir(directory):
        if not filename.endswith(".wrd") :
            continue
        path = directory+"/"+filename
        with open(path, "r") as w:
            gesture = w.readlines()

        matrix=[]

        for sensor in gesture :
            sym_quant_window = [re.findall(r'\d+',word)[0] for word in sensor.split(" - ")[1].split(",")] 
            matrix.append(sym_quant_window)
                
        gestures.append(matrix)
        
    return gestures

def amplitude_loader(directory):
    gestures = []
    for filename in os.listdir(directory):
        if not filename.endswith(".wrd") :
            continue
        path = directory+"/"+filename
        with open(path, "r") as w:
            gesture = w.readlines()

        matrix=[]

        for sensor in gesture :
            avg_quant_ampitude = [float(word) for word in sensor.split(" - ")[0].split("[")[1].replace("],","").split(",")]
            matrix.append(avg_quant_ampitude)
                
        gestures.append(matrix)
        
    return gestures

print("Enter directory (e.g. Data/) :")
directory = input()
#axis = directory.split("/")[1]

print("Enter value p :")
p = int(input())
print("Enter user options (1 ~ 7)")
print("* HINT : 1 = Dot similarity, 2 = PCA, 3 = SVD, 4 = NMF, 5 = LDA, 6 = Edit Distance, 7 = DTW")
user_option = int(input())  

print("Enter vector model (tf, tfidf):")
vector_model = input()

num_gestures = 0
for filename in os.listdir(directory + "X"):                                          
    if not filename.endswith(".csv") :                                          
        continue
    num_gestures+=1
print("Number of gesture : ", num_gestures)
gest_gest_sim = [0.0]*num_gestures
gest_index = 1
for filename in sorted(glob.glob(directory + "/X" + "/*.csv"), key=numericalSort):                                          
    #if not filename.endswith(".csv") :                                          
     #   continue                    
    #get indext of file                                            
    key_idx = gest_index
    

    if user_option == 1:
        outname="DP"
        
        gestures = makeMat(vector_model)
        
        #gestures = tf_loader(directory)
        key_gesture = gestures[key_idx-1]
        cost=[]
        for gesture in gestures :
            similarity = dot_similarity(key_gesture, gesture)
            cost.append(similarity)
        gest_gest_sim[key_idx-1] = cost
    
    elif user_option == 2:                                                        
        outname="PCA"
        PC_path = ["PCA", vector_model]
        PC_path = "_".join(PC_path)

        pca = pd.read_pickle(PC_path + ".pkl")
        num = len(pca[0])
        pca = pca.T
        key_vec = pca[key_idx-1]
        cost=[]
        for idx in range(num) :
            similarity = pears_similarity(key_vec, pca[idx])
            cost.append(similarity)
        gest_gest_sim[key_idx-1] = cost                                         
    elif user_option == 3:
        outname="SVD"
        PC_path = ["SVD", vector_model]
        PC_path = "_".join(PC_path)
        svd = pd.read_pickle(PC_path + ".pkl")                                      
        num = len(svd[0])                                                           
                                                                                    
        svd = svd.T                                                                 
        key_vec = svd[key_idx-1]      
        
        cost=[]                                                                     
        for idx in range(num) :                                                     
            similarity = cos_similarity(key_vec, svd[idx])                            
            cost.append(similarity)                 
        gest_gest_sim[key_idx-1] = cost                                         
    elif user_option == 4:
        outname="NMF"
        PC_path = ["NMF", vector_model]
        PC_path = "_".join(PC_path)
        nmf = pd.read_pickle(PC_path + ".pkl")
        num = len(nmf[0])                                                           
                                                                                    
        nmf = nmf.T                                                                 
        key_vec = nmf[key_idx-1]    
        
        cost=[]                                                                     
        for idx in range(num) :                                                     
            similarity = cos_similarity(key_vec, nmf[idx])                           
            cost.append(similarity)
        gest_gest_sim[key_idx-1] = cost                                         

    elif user_option == 5: 
        outname="LDA"
        PC_path = ["LDA", vector_model]
        PC_path = "_".join(PC_path)
        lda = pd.read_pickle(PC_path + ".pkl")
        num = len(lda[0])                                                           
                                                                                    
        lda = lda.T                                                                 
        key_vec = lda[key_idx-1]    
        
        cost=[]                                                                     
        for idx in range(num) :                                                     
            similarity = KL_div_similarity(key_vec, lda[idx])                            
            cost.append(similarity)        
        gest_gest_sim[key_idx-1] = cost                                         
    elif user_option == 6 :
        outname="ED"
        pathW = directory + "/W"
        pathX = directory + "/X"
        pathY = directory + "/Y"
        pathZ = directory + "/Z"
        gesturesW = symbol_loader(pathW)
        gesturesX = symbol_loader(pathX)
        gesturesY = symbol_loader(pathY)
        gesturesZ = symbol_loader(pathZ)
    
        gestures = np.append(gesturesW, gesturesX, axis = 1)
        gestures = np.append(gestures, gesturesY, axis = 1)
        gestures = np.append(gestures, gesturesZ, axis = 1)
        
        #gestures = symbol_loader(path)
        key_gesture = gestures[key_idx-1]
        
        cost=[0]*len(gestures)
        for i, gesture in enumerate(gestures) :
            for j, sensor in enumerate(gesture) :
                n = len(sensor)
                m = len(key_gesture[j])
                cost[i] += edit_distance(key_gesture[j], sensor, m, n)
        gest_gest_sim[key_idx-1] = cost                                         

    elif user_option == 7 :
        outname="DTW"
        
        pathW = directory + "/W"
        pathX = directory + "/X"
        pathY = directory + "/Y"
        pathZ = directory + "/Z"
        gesturesW = amplitude_loader(pathW)
        gesturesX = amplitude_loader(pathX)
        gesturesY = amplitude_loader(pathY)
        gesturesZ = amplitude_loader(pathZ)
    
        gestures = np.append(gesturesW, gesturesX, axis = 1)
        gestures = np.append(gestures, gesturesY, axis = 1)
        gestures = np.append(gestures, gesturesZ, axis = 1)
        
        #gestures = amplitude_loader(path)
        key_gesture = gestures[key_idx-1]
        
        cost=[0]*len(gestures)
        for i, gesture in enumerate(gestures) :                                     
            for j, sensor in enumerate(gesture) :                                   
                n = len(sensor)                                                     
                m = len(key_gesture[j])                                             
                cost[i] += dynamic_time_warping(key_gesture[j], sensor, m, n)    
        gest_gest_sim[key_idx-1] = cost  
        
    gest_index = gest_index + 1
     
import pandas as pd
from sklearn.decomposition import TruncatedSVD                                  
from scipy.sparse import coo_matrix
from scipy.sparse import random as sparse_random
from sklearn.random_projection import sparse_random_matrix
from sklearn.decomposition import NMF
matrix = coo_matrix(gest_gest_sim,shape=(num_gestures,num_gestures)) 
print(matrix)
np.savetxt("gest_sim.csv", gest_gest_sim, delimiter=',')
svd = TruncatedSVD(n_components=p)
pc = svd.fit_transform(matrix)
df = pd.DataFrame(data = pc) 
df = df.T
print("------TOP P SVD after ", outname, "-----")
print("* ordered in gesture, score")
print(df)
df.to_pickle("./"+"SVD_"+outname+".pkl")
np.savetxt('component_SVD.csv', svd.components_, delimiter=',')


nmf = NMF(n_components=p)
pc = nmf.fit_transform(matrix)
df = pd.DataFrame(data = pc) 
df = df.T
print("------TOP P NMF after ", outname, "-----")
print("* ordered in gesture, score")
print(df)
df.to_pickle("./"+"NMF_"+outname+".pkl")
np.savetxt('component_NMF.csv', nmf.components_, delimiter=',')


Please enter the following inputs as the same values you used for task 0: 
Enter the window length (ex: 3): 3
Enter the shift length (ex: 3): 3
Enter the resolution (ex: 3): 3
Enter directory (e.g. Data/) :
Dataset 5/
Enter value p :
3
Enter user options (1 ~ 7)
* HINT : 1 = Dot similarity, 2 = PCA, 3 = SVD, 4 = NMF, 5 = LDA, 6 = Edit Distance, 7 = DTW
2
Enter vector model (tf, tfidf):
tf
Number of gesture :  1023
  (0, 0)	1.0
  (0, 1)	-0.8721963066757121
  (0, 2)	-0.8923259439210433
  (0, 3)	-0.8676868631121248
  (0, 4)	-0.8940454524227719
  (0, 5)	-0.9307711518318526
  (0, 6)	-0.00844530874796971
  (0, 7)	-0.8675286397820352
  (0, 8)	-0.9011659499493458
  (0, 9)	-0.8909616212470274
  (0, 10)	-0.8879130965872309
  (0, 11)	0.99448020672601
  (0, 12)	-0.8531902287350643
  (0, 13)	-0.8563270050504989
  (0, 14)	-0.9009189454004138
  (0, 15)	-0.919362808372269
  (0, 16)	-0.9072724254161018
  (0, 17)	0.03457545022440288
  (0, 18)	-0.9051137867622205
  (0, 19)	-0.9238377687558991
  (0, 20)	-

ValueError: Negative values in data passed to NMF (input X)