In [2]:
import math
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import re
from operator import itemgetter
import csv
import time

In [3]:
'''LOAD EMBEDDING'''
def load_embedding(embedding_name):
    File =  open(embedding_name)
    Model = {}
    for line in File:
        split = line.split()
        #print(split[0])
        word = split[0]
        #word = split[2]
        #print(word)
        try:
            embedding = np.array([float(val) for val in split[1:]])
            if len(embedding) == 300:
                Model[word] = embedding
        except:
            print('Error in ' + word)

    File.close()
    print(embedding_name +" model loaded!")
    print("-----------------------------------------------------------")
    return Model

In [30]:
''''''
def top_synsets_per_word_csv(embedding_1, embedding_2, wordList, csvpath, topSize, dictpath="NULL"):
    for word in wordList:
        top = []
        for synset in embedding_1:
            similarity = cosine_similarity([embedding_2[word]], [embedding_1[synset]])
            top.append({'synset':synset, 'sim':similarity})
            if(len(top)>topSize):
                top = sorted(top, key=itemgetter('sim'), reverse=True)
                top.pop()
        if dictpath != "NULL":
            top = translator(top, dictpath)

        with open(csvpath, "a") as output:
            head = "Most similar synset to " + str(word).upper() + "\n"
            output.write(head)
            dictWriter = csv.DictWriter(output, fieldnames=['synset', 'sim'])
            for element in top:
                dictWriter.writerow(element)
            output.write("\n")
            
    print("Results in " + str(csvpath))
    
    
def top_words_per_synset_csv(sEmbedding, wEmbedding, csvpath, topSize, dictpath="NULL", specific=[]):
    if len(specific) > 0:
        for spec in specific:
            top = []
            for word in wEmbedding:
                similarity = cosine_similarity([sEmbedding[spec]], [wEmbedding[word]])
                top.append({'word':word, 'sim':similarity})
                if(len(top)>topSize):
                    top = sorted(top, key=itemgetter('sim'), reverse=True)
                    top.pop()
            if dictpath != "NULL":
                specificSynset = translator(spec, dictpath, 'TRUE', 'TRUE')

            with open(csvpath, "a") as output:
                head = "Most similar words to " + str(specificSynset).upper() + "\n"
                output.write(head)
                dictWriter = csv.DictWriter(output, fieldnames=['word', 'sim'])
                for element in top:
                    dictWriter.writerow(element)
                output.write("\n")   
            print("Results in " + str(csvpath))
        
    else:
        synsetList = []
        for synset in sEmbedding:
            if synset.endswith("n"):
                synsetList.append(synset)
        mySynsets = np.random.choice(synsetList, 10)
        for synset in mySynsets:
            top = []
            for word in wEmbedding:
                similarity = cosine_similarity([sEmbedding[synset]], [wEmbedding[word]])
                top.append({'word':word, 'sim':similarity})
                if(len(top)>topSize):
                    top = sorted(top, key=itemgetter('sim'), reverse=True)
                    top.pop()
            if dictpath != "NULL":
                synset = translator(synset, dictpath, 'TRUE', 'TRUE')

            with open(csvpath, "a") as output:
                head = "Most similar words to " + str(synset).upper() + "\n"
                output.write(head)
                dictWriter = csv.DictWriter(output, fieldnames=['word', 'sim'])
                for element in top:
                    dictWriter.writerow(element)
                output.write("\n")
        print("Results in " + str(csvpath))
        
'''
Given a set of synsets from "sEmbedding", gives a top of most similar words from "wEmbedding" of "precission"
and calculates a percentage of successes.
Parameters:
    sEmbedding: a synset embedding
    wEmbedding: a word embedding
    csvpath: path to save the output csv file e.g. /home/user/Desktop/example.csv
    dictpath: dictionary to translate synset codes of sEmbedding into words 
    specific: set of specific synsets to try, if empty, it takes a random sample of 10 synsets
    precission: preccision of the measurement, if empty, it calculates precission relative to the given synset
'''        
def precission_words_per_synset_csv(sEmbedding, wEmbedding, csvpath, dictpath, specific=[], precission='relative'):
    start_time = time.time()
    if len(specific) > 0:
        mySynsets = specific
        print('0 out of ' + str(len(specific)))
    else:
        synsetList = []
        for synset in sEmbedding:
            if synset.endswith("n"):
                synsetList.append(synset)
        mySynsets = np.random.choice(synsetList, 10)
        print('0 out of 10')
    
    progress = 0
    goal = len(mySynsets)
    totalScore = 0
    maxPosibleScore = 0
    totalScore5 = 0
    maxPosibleScore5 = 0
    loop_time = time.time()
    for synset in mySynsets:
        topSize = 10
        tSynset = translator(synset, dictpath, 'TRUE', 'TRUE')
        lSynset = tSynset.split()
        if precission != 'relative':
            topSize = int(precission)
        else:
            topSize = len(lSynset)-1
        
        top = []
        for word in wEmbedding:
            #similarity = cosine_similarity([sEmbedding[synset]], [wEmbedding[word]])
            similarity = cosine_simil(sEmbedding[synset], wEmbedding[word])
            top.append({'word':word, 'sim':similarity})
            if(len(top)>topSize):
                top = sorted(top, key=itemgetter('sim'), reverse=True)
                top.pop()
        
        score = 0
        score5 = 0
        indexP5 = 0
        maxScore = len(lSynset)-1
        for syn in lSynset:
            if syn != lSynset[len(lSynset)-1]:
                for elem in top:
                    if str(syn).lower() == str(elem.get('word')).lower():
                        score = score + 1
                        if indexP5 < 5:
                            score5 = score5 + 1
                            indexP5 = indexP5 + 1
        totalScore = totalScore + score
        totalScore5 = totalScore5 + score5
        maxPosibleScore = maxPosibleScore + maxScore
        maxPosibleScore5 = maxPosibleScore5 + maxScore
        precis = score / len(top) * 100
        maxPrecis = maxScore / len(top) * 100
        
        write_time = time.time()
        with open(csvpath, "a") as output:
            head = "Most similar words to " + str(tSynset).upper() + "\n"
            output.write(head)
            dictWriter = csv.DictWriter(output, fieldnames=['word', 'sim'])
            for element in top:
                dictWriter.writerow(element)
            output.write("Precision: " + str(precis) + "%")
            output.write("\n")
            output.write("Max precision: " + str(maxPrecis) + "% or " + str(maxScore) + " possible matches")
            output.write("\n")
            output.write("\n")
        
        progress = progress + 1
        print(str(progress) + ' out of ' + str(goal))
        #print('WRITE TIME: ' + str(write_time - time.time()))
    #print('LOOP TIME: ' + str(loop_time - time.time()))
    with open(csvpath, "a") as output:
        total = totalScore / maxPosibleScore * 100
        total5 = totalScore5 / maxPosibleScore5 * 100
        output.write("Success: " + str(total) + "%")
        output.write("\n")
        output.write("Success P@5: " + str(total5) + "%")
        output.write("\n")
    print("Results in " + str(csvpath))
    #print('TIME TOTAL: '+ srt(start_time - time.time()))
    
    
def precission_words_per_synset_csv_bi(sEmbedding, wEmbedding, csvpath, dictpath, biDictpath, specific=[], precission='relative'):
    start_time = time.time()
    if len(specific) > 0:
        mySynsets = specific
        print('0 out of ' + str(len(specific)))
    else:
        synsetList = []
        for synset in sEmbedding:
            if synset.endswith("n"):
                synsetList.append(synset)
        mySynsets = np.random.choice(synsetList, 10)
        print('0 out of 10')
    
    progress = 0
    goal = len(mySynsets)
    totalScore = 0
    maxPosibleScore = 0
    totalScore5 = 0
    maxPosibleScore5 = 0
    loop_time = time.time()
    for synset in mySynsets:
        topSize = 10
        tSynset = translator(synset, dictpath, 'TRUE', 'TRUE')
        lSynset = tSynset.split()
        biSynset = translator(synset, biDictpath, 'FLASE', 'TRUE')
        print(biSynset)
        if precission != 'relative':
            topSize = int(precission)
        else:
            topSize = len(lSynset)-1
        
        top = []
        for word in wEmbedding:
            #similarity = cosine_similarity([sEmbedding[synset]], [wEmbedding[word]])
            similarity = cosine_simil(sEmbedding[synset], wEmbedding[word])
            top.append({'word':word, 'sim':similarity})
            if(len(top)>topSize):
                top = sorted(top, key=itemgetter('sim'), reverse=True)
                top.pop()
        
        score = 0
        score5 = 0
        indexP5 = 0
        maxScore = 1 
        for elem in top:
            if str(biSynset).lower() == str(elem.get('word')).lower():
                score = score + 1
                if indexP5 < 5:
                    score5 = score5 + 1
                    indexP5 = indexP5 + 1
        totalScore = totalScore + score
        totalScore5 = totalScore5 + score5
        maxPosibleScore = maxPosibleScore + maxScore
        maxPosibleScore5 = maxPosibleScore5 + maxScore
        precis = score / len(top) * 100
        maxPrecis = maxScore / len(top) * 100
        
        write_time = time.time()
        with open(csvpath, "a") as output:
            head = "Most similar words to " + str(tSynset).upper() + "\n"
            output.write(head)
            dictWriter = csv.DictWriter(output, fieldnames=['word', 'sim'])
            for element in top:
                dictWriter.writerow(element)
            output.write("Precision: " + str(precis) + "%")
            output.write("\n")
            output.write("Max precision: " + str(maxPrecis) + "% or " + str(maxScore) + " possible matches")
            output.write("\n")
            output.write("\n")
        
        progress = progress + 1
        print(str(progress) + ' out of ' + str(goal))
        #print('WRITE TIME: ' + str(write_time - time.time()))
    #print('LOOP TIME: ' + str(loop_time - time.time()))
    with open(csvpath, "a") as output:
        total = totalScore / maxPosibleScore * 100
        total5 = totalScore5 / maxPosibleScore5 * 100
        output.write("Success: " + str(total) + "%")
        output.write("\n")
        output.write("Success P@5: " + str(total5) + "%")
        output.write("\n")
    print("Results in " + str(csvpath))
    #print('TIME TOTAL: '+ srt(start_time - time.time()))

def precission_words_per_word_csv(w1Embedding, w2Embedding, csvpath, specific=[], precission='relative'):
    if len(specific) > 0:
        mySynsets = specific
        print('0 out of ' + str(len(specific)))
    else:
        synsetList = []
        for synset in w1Embedding:
            synsetList.append(synset)
        mySynsets = np.random.choice(synsetList, 10)
        print('0 out of 10')
    
    progress = 0
    goal = len(mySynsets)
    totalScore = 0
    maxPosibleScore = 0
    for synset in mySynsets:
        topSize = 10
        if precission != 'relative':
            topSize = int(precission)
        
        top = []
        for word in w2Embedding:
            #similarity = cosine_similarity([sEmbedding[synset]], [wEmbedding[word]])
            similarity = cosine_simil(w1Embedding[synset], w2Embedding[word])
            top.append({'word':word, 'sim':similarity})
            if(len(top)>topSize):
                top = sorted(top, key=itemgetter('sim'), reverse=True)
                top.pop()
        
        score = 0
        maxScore = 1 
        for elem in top:
            if str(synset).lower() == str(elem.get('word')).lower():
                score = score + 1
        totalScore = totalScore + score
        if score > 0:
            precis = 100
        else:
            precis = 0
        maxPosibleScore = maxPosibleScore + maxScore
        maxPrecis = maxScore / len(top) * 100
        
        write_time = time.time()
        with open(csvpath, "a") as output:
            head = "Most similar words to " + str(synset).upper() + "\n"
            output.write(head)
            dictWriter = csv.DictWriter(output, fieldnames=['word', 'sim'])
            for element in top:
                dictWriter.writerow(element)
            output.write("Precision: " + str(precis) + "%")
            output.write("\n")
            output.write("Max precision: " + str(maxPrecis) + "% or " + str(maxScore) + " possible matches")
            output.write("\n")
            output.write("\n")
        
        progress = progress + 1
        print(str(progress) + ' out of ' + str(goal))
        #print('WRITE TIME: ' + str(write_time - time.time()))
    #print('LOOP TIME: ' + str(loop_time - time.time()))
    with open(csvpath, "a") as output:
        total = totalScore / maxPosibleScore * 100
        output.write("Success: " + str(total) + "%")
        output.write("\n")
    print("Results in " + str(csvpath))
    #print('TIME TOTAL: '+ srt(start_time - time.time()))
    
def calculate_precission_csv(sEmbedding, wEmbedding, csvpath, dictpath, umbral, specific=[], precission='relative'):
    if len(specific) > 0:
        mySynsets = specific
        print('0 out of ' + str(len(specific)))
    else:
        synsetList = []
        for synset in sEmbedding:
            if synset.endswith("n"):
                synsetList.append(synset)
        mySynsets = np.random.choice(synsetList, 10)
        print('0 out of 10')
    
    progress = 0
    goal = len(mySynsets)
    tPosTotal = 0
    fPosTotal = 0
    fNegTotal = 0
    for synset in mySynsets:
        topSize = 10
        tSynset = translator(synset, dictpath, 'TRUE', 'TRUE')
        lSynset = tSynset.split()
        if precission != 'relative':
            topSize = int(precission)
        else:
            topSize = len(lSynset)-1
        
        top = []
        for word in wEmbedding:
            #similarity = cosine_similarity([sEmbedding[synset]], [wEmbedding[word]])
            similarity = cosine_simil(sEmbedding[synset], wEmbedding[word])
            top.append({'word':word, 'sim':similarity})
            if(len(top)>topSize):
                top = sorted(top, key=itemgetter('sim'), reverse=True)
                top.pop()
                
        valid = []
        noValid = []
        ind = 0
        for e in top:
            if e.get('sim') >= umbral:
                valid.append(e.get('word'))
                ind = ind + 1
            else:
                noValid.append(e.get('word'))
        
        correctW = lSynset
        #print('Correct: ' + str(correctW))
        #print('valid: ' + str(valid))
        #print('No valid: ' + str(noValid))
        fPos = 0
        tPos = 0
        for word in valid:
            if word.lower() in correctW:
                tPos = tPos + 1
            else:
                fPos = fPos + 1
        
        fNeg = 0
        for word in noValid:
            if word.lower() in correctW:
                fNeg = fNeg + 1
        
        tPosTotal = tPosTotal + tPos
        fPosTotal = fPosTotal + fPos
        fNegTotal = fNegTotal + fNeg
        if tPos + fPos > 0:
            pre = tPos / (tPos + fPos)
        else:
            pre = 0
        if tPos + fNeg > 0:
            recall = tPos / (tPos + fNeg)
        else:
            recall = 0
        if pre + recall > 0:
            f1 = 2 * (pre * recall / (pre + recall))
        else:
            f1 = 0
    
        with open(csvpath, "a") as output:
            head = "Most similar words to " + str(tSynset).upper() + "\n"
            output.write(head)
            dictWriter = csv.DictWriter(output, fieldnames=['word', 'sim'])
            for element in top:
                dictWriter.writerow(element)
            output.write("Precision: " + str(pre))
            output.write("\n")
            output.write("Recall: " + str(recall))
            output.write("\n")
            output.write("F1: " + str(f1))
            output.write("\n")
            output.write("\n")
        
        progress = progress + 1
        print(str(progress) + ' out of ' + str(goal))
        #print('WRITE TIME: ' + str(write_time - time.time()))
    #print('LOOP TIME: ' + str(loop_time - time.time()))
    with open(csvpath, "a") as output:
        if tPosTotal + fPosTotal > 0:
            preTotal = tPosTotal / (tPosTotal + fPosTotal)
        else:
            preTotal = 0
        if tPosTotal + fNegTotal > 0:
            recallTotal = tPosTotal / (tPosTotal + fNegTotal)
        else:
            recallTotal = 0
        if preTotal + recallTotal > 0:
            f1Total = 2 * (preTotal * recallTotal / (preTotal + recallTotal))
        else:
            f1Total = 0
        output.write("Precision: " + str(preTotal))
        output.write("\n")
        output.write("Recall: " + str(recallTotal))
        output.write("\n")
        output.write("F1: " + str(f1Total))
        output.write("\n")
    print("Results in " + str(csvpath))
    #print('TIME TOTAL: '+ srt(start_time - time.time()))

def txt_translator(inputFile, dictionary, outputFile):
    dictFile = open(dictionary)
    dictArray = []
    for line in dictFile:
        split = line.split()
        dictArray.append({'from':split[0], 'to':split[1]})
    dictFile.close()
    
    targetFile = open(inputFile)
    for line in targetFile:
        split = line.split()
        toWrite = []
        for word in split:
            #if word.endswith("-n"):
            if re.search('-[a-z]$', word):
                error = 1
                for elem in dictArray:
                    if word == elem.get('from'):
                        error = 0
                        toWrite.append(elem.get('to'))
                if error == 1:
                    print("Could not translate " + word)
                    toWrite.append(word)
            else:
                toWrite.append(word)
        with open(outputFile, 'a') as output:
            for res in toWrite:
                output.write(res + " ")
            output.write("\n")
    targetFile.close()
    
    print("Output file in " + outputFile)
    
    
def translator(to_translate, dictionary, namePlusCode='FALSE', strOnly='FALSE'):
    dictFile = open(dictionary)
    dictArray = []
    for line in dictFile:
        split = line.split()
        poly = ''
        for w in split:
            if w != split[0]:
                poly = poly + ' ' + w
        dictArray.append({'from':split[0], 'to':poly})
    dictFile.close()
    
    if len(to_translate) < 1:
        print("Can't translate, length 0")
        return -1
    
    else:
        if strOnly == 'FALSE':
            transList = []
            for element in to_translate:
                error = 1
                for elem in dictArray:
                    if element.get('synset') == elem.get('from'):
                        error = 0
                        traduction = elem.get('to')
                        if namePlusCode != 'FALSE':
                            traduction = traduction + ' ' + elem.get('from')
                        transList.append({'synset':traduction, 'sim':element.get('sim')})
                if error == 1:
                    transList.append({'synset':element.get('synset'), 'sim':element.get('sim')})
                    print(str(element.get('synset')) + "not found, will not be translated")
            return transList
    
        else:
            error = 1
            for elem in dictArray:
                if to_translate == elem.get('from'):
                    error = 0
                    translation = elem.get('to')
                    if namePlusCode != 'FALSE':
                        translation = translation + ' ' + elem.get('from')
            if error == 1:
                translation = to_translate
                print(to_translate + " not found, will not be translated")
            return translation
        
def random_sample(size, embedding='NULL', dictionary='NULL'):
    synsetList = []
    if embedding != 'NULL' and dictionary == 'NULL':
        for synset in embedding:
            if synset.endswith("n"):
                synsetList.append(synset)
    elif embedding == 'NULL' and dictionary != 'NULL':
        file = open(dictionary, 'r')
        for line in file:
            spl = line.split()
            code = spl[0]
            synsetList.append(code)
    elif embedding == 'NULL' and dictionary == 'NULL':
        print('No embedding nor dictionary path provided!!')
        return -1
    else:
        print('Choose only one: embedding or dictionary')
        return -1
    sample = np.random.choice(synsetList, size)
    return sample

def random_sample_word(size, embedding='NULL', dictionary='NULL'):
    wordList = []
    if embedding != 'NULL' and dictionary == 'NULL':
        for word in embedding:
            wordList.append(word)
    elif embedding == 'NULL' and dictionary != 'NULL':
        file = open(dictionary, 'r')
        for line in file:
            spl = line.split()
            code = spl[1]
            wordList.append(code)
    elif embedding == 'NULL' and dictionary == 'NULL':
        print('No embedding nor dictionary path provided!!')
        return -1
    else:
        print('Choose only one: embedding or dictionary')
        return -1
    sample = np.random.choice(wordList, size)
    return sample

def retrieve_synset_from_csv(csvPath):
    synsetList = []
    for file in csvPath:
        csv = open(file, 'r')
        for line in csv:
            if re.search('-[A-Z]$', line):
                split = line.split()
                synset = split[len(split)-1]
                synsetList.append(str(synset).lower())
        csv.close()
    return synsetList


def cosine_simil(vector1, vector2):
    #https://stats.stackexchange.com/questions/146221/is-cosine-similarity-identical-to-l2-normalized-euclidean-distance
    
    return np.dot(vector1,vector2)/(np.linalg.norm(vector1)*np.linalg.norm(vector2))


In [5]:
pathWN_GN = '/home/josu/Desktop/vecmap-master/wn_synsets_Mapped.txt'
pathGN = '/home/josu/Desktop/vecmap-master/GN_1,5M_Mapped.txt'

pathWN_GN_UKB = '/home/josu/Desktop/vecmap-master/wn_synsets_Mapped_GN+UKB.txt'
pathGN_UKB = '/home/josu/Desktop/vecmap-master/GN_1,5M_Mapped_UKB_Mapped_WN.txt'

pathWN_GN_UKB_PPA = '/home/josu/Desktop/vecmap-master/wn_synsets_Mapped_GN+UKB+PPA.txt'
pathGN_UKB_PPA = '/home/josu/Desktop/vecmap-master/GN_1,5M_Mapped_UKB_PPA_Mapped_WN.txt'

pathWN_Dict = '/home/josu/Downloads/wnet30_dict.synset-word.txt'

pathWN_src_GN = '/home/josu/Desktop/vecmap-master/WN_Mapped_src_GN.txt'
pathSrc_GN_WN = '/home/josu/Desktop/vecmap-master/src_GN_Mapped_WN.txt'

pathWN_center_GN = '/home/josu/Desktop/vecmap-master/WN_center_Mapped_GN.txt'
pathGN_center_WN = '/home/josu/Desktop/vecmap-master/GN_Mapped_center_WN.txt'

pathWN_src_GN_UKB = '/home/josu/Desktop/vecmap-master/WN_Mapped_src_GN_Mapped_UKB.txt'
pathSrc_GN_UKB_WN = '/home/josu/Desktop/vecmap-master/src_GN_Mapped_UKB_Mapped_WN.txt'

pathWN_center_GN_UKB = '/home/josu/Desktop/vecmap-master/WN_center_Mapped_GN_Mapped_UKB.txt'
pathGN_UKB_center_WN = '/home/josu/Desktop/vecmap-master/GN_Mapped_UKB_Mapped_center_WN.txt'

pathWN_src_GN_UKB_PPA = '/home/josu/Desktop/vecmap-master/WN_Mapped_src_GN_Mapped_UKB_PPA.txt'
pathSrc_GN_UKB_PPA_WN = '/home/josu/Desktop/vecmap-master/src_GN_Mapped_UKB_PPA_Mapped_WN.txt'

pathWN_center_GN_UKB_PPA = '/home/josu/Desktop/vecmap-master/WN_center_Mapped_GN_Mapped_UKB_PPA.txt'
pathGN_UKB_PPA_center_WN = '/home/josu/Desktop/vecmap-master/GN_Mapped_UKB_PPA_Mapped_center_WN.txt'

In [None]:
'''WN mapped to GN'''
wnEmbedding = load_embedding(pathWN_GN)
gnEmbedding = load_embedding(pathGN)

In [None]:
'''WN mapped to (GN mapped to UKB)'''
wnEmbedding = load_embedding(pathWN_GN_UKB)
gnEmbedding = load_embedding(pathGN_UKB)

In [None]:
'''WN mapped to ((GN mapped to UKB) + PPA)'''
wnEmbedding = load_embedding(pathWN_GN_UKB_PPA)
gnEmbedding = load_embedding(pathGN_UKB_PPA)

In [None]:
'''GN mapped to WN'''
wnEmbedding = load_embedding(pathWN_src_GN)
gnEmbedding = load_embedding(pathSrc_GN_WN)

In [None]:
'''WN and GN mapped to center'''
wnEmbedding = load_embedding(pathWN_center_GN)
gnEmbedding = load_embedding(pathGN_center_WN)

In [None]:
'''(GN mapped to UKB) mapped to WN'''
wnEmbedding = load_embedding(pathWN_src_GN_UKB)
gnEmbedding = load_embedding(pathSrc_GN_UKB_WN)

In [None]:
'''WN and (GN mapped to UKB) mapped to center'''
wnEmbedding = load_embedding(pathWN_center_GN_UKB)
gnEmbedding = load_embedding(pathGN_UKB_center_WN)

In [None]:
'''((GN mapped to UKB) + PPA) mapped to WN'''
wnEmbedding = load_embedding(pathWN_src_GN_UKB_PPA)
gnEmbedding = load_embedding(pathSrc_GN_UKB_PPA_WN)

In [None]:
'''WN and ((GN mapped to UKB) + PPA) mapped to center'''
wnEmbedding = load_embedding(pathWN_center_GN_UKB_PPA)
gnEmbedding = load_embedding(pathGN_UKB_PPA_center_WN)

In [None]:
sanity_test(wnEmbedding, gnEmbedding,'/home/josu/Desktop/prueba.txt')

In [None]:
word_synset_comparison(wnEmbedding, gnEmbedding, "party", "/home/josu/Desktop/basura.txt")

In [None]:
top_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Random_WN_GN.csv", 5, pathWN_Dict, '05953416-n')

In [None]:
'''PRUEBAS'''

'''Set 10 polysemicas'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Polysemic_P@5_GN+UKB_WN.csv", pathWN_Dict, ['08256968-n', '07838551-n', '04139859-n', '15098161-n', '13341756-n', '10639925-n', '10276659-n', '08349350-n', '06269956-n', '06780882-n'], 5)

'''Set 10 monosemicas'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Monosemic_P@5_GN+UKB_WN.csv", pathWN_Dict, ['09823502-n', '02992529-n', '03840681-n', '02377181-n', '14383252-n', '06115701-n', '04820908-n', '04825576-n', '02692232-n', '07187297-n'], 5)

'''Set 100 random'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Random_P@5_GN+UKB_WN.csv", pathWN_Dict, [],  5)

In [None]:
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Polysemic_P@5_WN_GN.csv", pathWN_Dict, ['05953416-n', '10639925-n'],  5)

In [None]:
'''WN and GN mapped to center'''
wnEmbedding = load_embedding(pathWN_center_GN)
gnEmbedding = load_embedding(pathGN_center_WN)

In [None]:
'''PRUEBAS'''

'''Set 10 polysemicas'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Polysemic_P@5_GN_center_WN.csv", pathWN_Dict, ['08256968-n', '07838551-n', '04139859-n', '15098161-n', '13341756-n', '10639925-n', '10276659-n', '08349350-n', '06269956-n', '06780882-n'], 5)

'''Set 10 monosemicas'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Monosemic_P@5_GN_center_WN.csv", pathWN_Dict, ['09823502-n', '02992529-n', '03840681-n', '02377181-n', '14383252-n', '06115701-n', '04820908-n', '04825576-n', '02692232-n', '07187297-n'], 5)

'''Set 100 random'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Random_P@5_GN_center_WN.csv", pathWN_Dict, [],  5)

In [None]:
'''WN and (GN mapped to UKB) mapped to center'''
wnEmbedding = load_embedding(pathWN_center_GN_UKB)
gnEmbedding = load_embedding(pathGN_UKB_center_WN)

In [None]:
'''PRUEBAS'''

'''Set 10 polysemicas'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Polysemic_P@5_GN+UKB_center_WN.csv", pathWN_Dict, ['08256968-n', '07838551-n', '04139859-n', '15098161-n', '13341756-n', '10639925-n', '10276659-n', '08349350-n', '06269956-n', '06780882-n'], 5)

'''Set 10 monosemicas'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Monosemic_P@5_GN+UKB_center_WN.csv", pathWN_Dict, ['09823502-n', '02992529-n', '03840681-n', '02377181-n', '14383252-n', '06115701-n', '04820908-n', '04825576-n', '02692232-n', '07187297-n'], 5)

'''Set 100 random'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Random_P@5_GN+UKB_center_WN.csv", pathWN_Dict, [],  5)

In [None]:
'''((GN mapped to UKB) + PPA) mapped to WN'''
wnEmbedding = load_embedding(pathWN_src_GN_UKB_PPA)
gnEmbedding = load_embedding(pathSrc_GN_UKB_PPA_WN)

In [None]:
'''PRUEBAS'''

'''Set 10 polysemicas'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Polysemic_P@5_GN+UKB+PPA_WN.csv", pathWN_Dict, ['08256968-n', '07838551-n', '04139859-n', '15098161-n', '13341756-n', '10639925-n', '10276659-n', '08349350-n', '06269956-n', '06780882-n'], 5)

'''Set 10 monosemicas'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Monosemic_P@5_GN+UKB+PPA_WN.csv", pathWN_Dict, ['09823502-n', '02992529-n', '03840681-n', '02377181-n', '14383252-n', '06115701-n', '04820908-n', '04825576-n', '02692232-n', '07187297-n'], 5)

'''Set 100 random'''
precission_words_per_synset_csv(wnEmbedding, gnEmbedding, "/home/josu/Desktop/Random_P@5_GN+UKB+PPA_WN.csv", pathWN_Dict, [],  5)

In [4]:
'''PRUEBAS WN_SYN Y WN_UKB'''

path_src_SYN_UKB = '/home/josu/Desktop/vecmap-master/EmbeddingP/WN_SYN->WN_UKB/WN_SYN_Mapped_UKB.txt'
path_UKB_src_SYN = '/home/josu/Desktop/vecmap-master/EmbeddingP/WN_SYN->WN_UKB/WN_UKB_Mapped_WN_SYN.txt'

path_SYN_src_UKB = '/home/josu/Desktop/vecmap-master/EmbeddingP/WN_UKB->WN_SYN/WN_SYN_Mapped_src_UKB.txt'
path_src_UKB_SYN = '/home/josu/Desktop/vecmap-master/EmbeddingP/WN_UKB->WN_SYN/src_WN_UKB_Mapped_WN_SYN.txt'

path_SYN_center_UKB = '/home/josu/Desktop/vecmap-master/EmbeddingP/WN_SYN-><-WN_UKB/WN_SYN_center_Mapped_WN_UKB.txt'
path_UKB_center_SYN = '/home/josu/Desktop/vecmap-master/EmbeddingP/WN_SYN-><-WN_UKB/WN_UKB_Mapped_center_WN_SYN.txt'

path_SYN_ortho_UKB = '/home/josu/Desktop/vecmap-master/WN_SYN_Mapped_WN_UKB_Ortho.txt'
path_UKB_ortho_SYN = '/home/josu/Desktop/vecmap-master/WN_UKB_Mapped_WN_SYN_Ortho.txt'

pathWN_Dict = '/home/josu/Downloads/wnet30_dict.synset-word.txt'
path_WN_Dict_mono = '/home/josu/Desktop/vecmap-master/dict_monosemico2.txt'
path_WN_Dict_poly = '/home/josu/Downloads/dict_polisemico.txt'

path_avg_WN_GN_UKB = '/home/josu/Desktop/vecmap-master_2/WN_Mapped_GN_avg_UKB.txt' 
path_avg_GN_UKB_WN = '/home/josu/Desktop/vecmap-master_2/GN_avg_UKB_Mapped_WN.txt' 

path_unsup_WN_UKB = '/home/josu/Desktop/vecmap-master_2/SYN_Mapped_UKB_unsup.txt'
path_unsup_UKB_WN = '/home/josu/Desktop/vecmap-master_2/UKB_Mapped_SYN_unsup.txt'

path_unsup_WN_GN = '/home/josu/Desktop/vecmap-master_2/WN_Mapped_GN_avg_UKB_unsup.txt'
path_unsup_GN_WN = '/home/josu/Desktop/vecmap-master_2/GN_avg_UKB_Mapped_WN_unsup.txt'

path_WN_GN_UKB_5 = '/home/josu/Desktop/vecmap-master_2/WN_Mapped_GN_UKB_5.txt'
path_GN_UKB_WN_5 = '/home/josu/Desktop/vecmap-master_2/GN_UKB_Mapped_WN_5.txt'

path_WN_GN_UKB_50 = '/home/josu/Desktop/vecmap-master_2/WN_Mapped_GN_UKB_50.txt'
path_GN_UKB_WN_50 = '/home/josu/Desktop/vecmap-master_2/GN_UKB_Mapped_WN_50.txt'

In [8]:
SYN_Embedding = load_embedding(path_src_SYN_UKB)
UKB_Embedding = load_embedding(path_UKB_src_SYN)

SYN_Embedding = load_embedding(path_SYN_src_UKB)
UKB_Embedding = load_embedding(path_src_UKB_SYN)

SYN_Embedding = load_embedding(path_SYN_center_UKB)
UKB_Embedding = load_embedding(path_UKB_center_SYN)

FileNotFoundError: [Errno 2] No such file or directory: '/home/josu/Desktop/vecmap-master/EmbeddingP/WN_SYN->WN_UKB/WN_SYN_Mapped_UKB.txt'

In [None]:
SYN_Embedding = load_embedding(path_src_SYN_UKB)
UKB_Embedding = load_embedding(path_UKB_src_SYN)
rSample = random_sample(SYN_Embedding, 100)

precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Random_SYN->UKB.csv", pathWN_Dict, rSample,  10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Monosemic_SYN->UKB.csv", pathWN_Dict, ['09823502-n', '02992529-n', '03840681-n', '02377181-n', '14383252-n', '06115701-n', '04820908-n', '04825576-n', '02692232-n', '07187297-n'], 10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Polysemic_UKB->SYN.csv", pathWN_Dict, ['08256968-n', '07838551-n', '04139859-n', '15098161-n', '13341756-n', '10639925-n', '10276659-n', '08349350-n', '06269956-n', '06780882-n'], 10)

SYN_Embedding = load_embedding(path_SYN_src_UKB)
UKB_Embedding = load_embedding(path_src_UKB_SYN)

precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Random_UKB->SYN.csv", pathWN_Dict, rSample,  10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Monosemic_UKB->SYN.csv", pathWN_Dict, ['09823502-n', '02992529-n', '03840681-n', '02377181-n', '14383252-n', '06115701-n', '04820908-n', '04825576-n', '02692232-n', '07187297-n'], 10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Polysemic_UKB->SYN.csv", pathWN_Dict, ['08256968-n', '07838551-n', '04139859-n', '15098161-n', '13341756-n', '10639925-n', '10276659-n', '08349350-n', '06269956-n', '06780882-n'], 10)

SYN_Embedding = load_embedding(path_SYN_center_UKB)
UKB_Embedding = load_embedding(path_UKB_center_SYN)

precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Random_SYN-><-UKB.csv", pathWN_Dict, rSample,  10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Monosemic_SYN-><-UKB.csv", pathWN_Dict, ['09823502-n', '02992529-n', '03840681-n', '02377181-n', '14383252-n', '06115701-n', '04820908-n', '04825576-n', '02692232-n', '07187297-n'], 10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Polysemic_SYN-><-UKB.csv", pathWN_Dict, ['08256968-n', '07838551-n', '04139859-n', '15098161-n', '13341756-n', '10639925-n', '10276659-n', '08349350-n', '06269956-n', '06780882-n'], 10)

In [None]:
SYN_Embedding = load_embedding(path_src_SYN_UKB)
UKB_Embedding = load_embedding(path_UKB_src_SYN)
mSample = random_sample(100, 'NULL', path_WN_Dict_mono)
pSample = random_sample(100, 'NULL', path_WN_Dict_poly)

precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Monosemic_SYN->UKB.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Polysemic_SYN->UKB.csv", pathWN_Dict, pSample, 10)

SYN_Embedding = load_embedding(path_SYN_src_UKB)
UKB_Embedding = load_embedding(path_src_UKB_SYN)

precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Monosemic_UKB->SYN.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Polysemic_UKB->SYN.csv", pathWN_Dict, pSample, 10)

SYN_Embedding = load_embedding(path_SYN_center_UKB)
UKB_Embedding = load_embedding(path_UKB_center_SYN)

precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Monosemic_SYN-><-UKB.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Polysemic_SYN-><-UKB.csv", pathWN_Dict, pSample, 10)

In [7]:
SYN_Embedding = load_embedding(path_SYN_ortho_UKB)
UKB_Embedding = load_embedding(path_UKB_ortho_SYN)
mSample = retrieve_synset_from_csv('/home/josu/Desktop/Monosemic_SYN-><-UKB.csv')
pSample = retrieve_synset_from_csv('/home/josu/Desktop/Polysemic_SYN-><-UKB.csv')
rSample = retrieve_synset_from_csv('/home/josu/Desktop/Random_SYN-><-UKB.csv')

print(len(mSample))
print(len(pSample))
print(len(rSample))

precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Random_SYN_ortho_UKB.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Monosemic_SYN_ortho_UKB.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Polysemic_SYN_ortho_UKB.csv", pathWN_Dict, pSample, 10)

SYN_Embedding = load_embedding(path_src_SYN_UKB)
UKB_Embedding = load_embedding(path_UKB_src_SYN)

precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Random_SYN->UKB.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, UKB_Embedding, "/home/josu/Desktop/Monosemic_SYN->UKB.csv", pathWN_Dict, mSample, 10)

/home/josu/Desktop/vecmap-master/WN_SYN_Mapped_WN_UKB_Ortho.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master/WN_UKB_Mapped_WN_SYN_Ortho.txt model loaded!
-----------------------------------------------------------
Most similar words to  FERDINAND_II 10968956-N

Most similar words to  SKULK 01918521-V

Most similar words to  CONVECTION 13452614-N

Most similar words to  GREEN_PEACH_APHID 02253264-N

Most similar words to  SET_DECORATION 04176889-N

Most similar words to  DEMOBILISATION DEMOBILIZATION 01158190-N

Most similar words to  PROSERPINA PROSERPINE 09569985-N

Most similar words to  FRANS_HALS HALS 11025508-N

Most similar words to  HEAVYHEARTEDNESS 07533492-N

Most similar words to  GRATINGLY HARSHLY RASPINGLY 00350521-R

Most similar words to  BOYLE'S_LAW MARIOTTE'S_LAW 05875988-N

Most similar words to  POTHUNTER 10460501-N

Most similar words to  FREEWHEELING 02001240-A

Most similar words to  ACHROMATINIC 0269663

1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53 out of 100
54 out of 100
55 out of 100
56 out of 100
57 out of 100
58 out of 100
59 out of 100
60 out of 100
61 out of 100
62 out of 100
63 out of 100
64 out of 100
65 out of 100
66 out of 100
67 out of 100
68 out of 100
69 out of 100
70 out of 100
71 out of 100
72 out of 100
7

In [None]:
l = retrieve_synset_from_csv('/home/josu/Desktop/Monosemic_SYN->UKB.csv')
print(len(l))

In [6]:
SYN_Embedding = load_embedding(path_avg_WN_GN_UKB)
W_Embedding = load_embedding(path_avg_GN_UKB_WN)
mSample = retrieve_synset_from_csv('/home/josu/Desktop/Monosemic_SYN-><-UKB.csv')
pSample = retrieve_synset_from_csv('/home/josu/Desktop/Polysemic_SYN-><-UKB.csv')
rSample = retrieve_synset_from_csv('/home/josu/Desktop/Random_SYN-><-UKB.csv')

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Random_avg_WN->GN+UKB.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Monosemic_avg_WN->GN+UKB.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Polysemic_avg_WN->GN+UKB.csv", pathWN_Dict, pSample, 10)

SYN_Embedding = load_embedding(path_unsup_WN_UKB)
W_Embedding = load_embedding(path_unsup_UKB_WN)

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Random_WN->UKB_unsup.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Monosemic_WN->UKB_unsup.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Polysemic_WN->UKB_unsup.csv", pathWN_Dict, pSample, 10)


/home/josu/Desktop/vecmap-master_2/WN_Mapped_GN_avg_UKB.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master_2/GN_avg_UKB_Mapped_WN.txt model loaded!
-----------------------------------------------------------
0 out of 100
1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100


26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53 out of 100
54 out of 100
55 out of 100
56 out of 100
57 out of 100
58 out of 100
59 out of 100
60 out of 100
61 out of 100
62 out of 100
63 out of 100
64 out of 100
65 out of 100
66 out of 100
67 out of 100
68 out of 100
69 out of 100
70 out of 100
71 out of 100
72 out of 100
73 out of 100
74 out of 100
75 out of 100
76 out of 100
77 out of 100
78 out of 100
79 out of 100
80 out of 100
81 out of 100
82 out of 100
83 out of 100
84 out of 100
85 out of 100
86 out of 100
87 out of 100
88 out of 100
89 out of 100
90 out of 100
91 out of 100
92 out of 100
93 out of 100
94 out of 100
95 out of 100
96 out of 100
97 out

In [5]:
SYN_Embedding = load_embedding(path_unsup_WN_GN)
W_Embedding = load_embedding(path_unsup_GN_WN)
mSample = retrieve_synset_from_csv('/home/josu/Desktop/Monosemic_SYN-><-UKB.csv')
pSample = retrieve_synset_from_csv('/home/josu/Desktop/Polysemic_SYN-><-UKB.csv')
rSample = retrieve_synset_from_csv('/home/josu/Desktop/Random_SYN-><-UKB.csv')

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Random_WN->GN_unsup.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Monosemic_WN->GN_unsup.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Polysemic_WN->GN_unsup.csv", pathWN_Dict, pSample, 10)


/home/josu/Desktop/vecmap-master_2/WN_Mapped_GN_unsup.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master_2/GN_Mapped_WN_unsup.txt model loaded!
-----------------------------------------------------------
0 out of 100
1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53 o

In [5]:
#SYN_Embedding = load_embedding(path_unsup_WN_GN)
#W_Embedding = load_embedding(path_unsup_GN_WN)
mSample = retrieve_synset_from_csv('/home/josu/Desktop/Monosemic_SYN-><-UKB.csv')
pSample = retrieve_synset_from_csv('/home/josu/Desktop/Polysemic_SYN-><-UKB.csv')
rSample = retrieve_synset_from_csv('/home/josu/Desktop/Random_SYN-><-UKB.csv')

#precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Random_WN->GN_unsup.csv", pathWN_Dict, rSample, 10)
#precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Monosemic_WN->GN_unsup.csv", pathWN_Dict, mSample, 10)
#precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Polysemic_WN->GN_unsup.csv", pathWN_Dict, pSample, 10)

SYN_Embedding = load_embedding(path_WN_GN_UKB_5)
W_Embedding = load_embedding(path_GN_UKB_WN_5)

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Random_WN->GN_5.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Monosemic_WN->GN_5.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Polysemic_WN->GN_5.csv", pathWN_Dict, pSample, 10)

SYN_Embedding = load_embedding(path_WN_GN_UKB_50)
W_Embedding = load_embedding(path_GN_UKB_WN_50)

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Random_WN->GN_50.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Monosemic_WN->GN_50.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Polysemic_WN->GN_50.csv", pathWN_Dict, pSample, 10)

/home/josu/Desktop/vecmap-master_2/WN_Mapped_GN_UKB_5.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master_2/GN_UKB_Mapped_WN_5.txt model loaded!
-----------------------------------------------------------
0 out of 100
1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53 o

28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53 out of 100
54 out of 100
55 out of 100
56 out of 100
57 out of 100
58 out of 100
59 out of 100
60 out of 100
61 out of 100
62 out of 100
63 out of 100
64 out of 100
65 out of 100
66 out of 100
67 out of 100
68 out of 100
69 out of 100
70 out of 100
71 out of 100
72 out of 100
73 out of 100
74 out of 100
75 out of 100
76 out of 100
77 out of 100
78 out of 100
79 out of 100
80 out of 100
81 out of 100
82 out of 100
83 out of 100
84 out of 100
85 out of 100
86 out of 100
87 out of 100
88 out of 100
89 out of 100
90 out of 100
91 out of 100
92 out of 100
93 out of 100
94 out of 100
95 out of 100
96 out of 100
97 out of 100
98 out of 100
99 out

In [5]:
SYN_Embedding = load_embedding('/media/josu/Transcend/TFG/Embedding/AVG/WN_Mapped_GN_unsup_2.txt')
W_Embedding = load_embedding('/media/josu/Transcend/TFG/Embedding/AVG/GN_Mapped_WN_unsup_2.txt')
mSample = retrieve_synset_from_csv(['/home/josu/Desktop/Monosemic_SYN-><-UKB.csv'])
pSample = retrieve_synset_from_csv(['/home/josu/Desktop/Polysemic_SYN-><-UKB.csv'])
rSample = retrieve_synset_from_csv(['/home/josu/Desktop/Random_SYN-><-UKB.csv'])

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Random_WN->GN_unsup.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Monosemic_WN->GN_unsup.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Polysemic_WN->GN_unsup.csv", pathWN_Dict, pSample, 10)

SYN_Embedding = load_embedding('/media/josu/Transcend/TFG/Embedding/AVG/WN_Mapped_GN_avg_UKB_unsup.txt')
W_Embedding = load_embedding('/media/josu/Transcend/TFG/Embedding/AVG/GN_avg_UKB_Mapped_WN_unsup.txt')

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Random_WN->GN_UKB_unsup.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Monosemic_WN->GN_UKB_unsup.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Polysemic_WN->GN_UKB_unsup.csv", pathWN_Dict, pSample, 10)

SYN_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/SYN_Mapped_UKB_unsup.txt')
W_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/UKB_Mapped_SYN_unsup.txt')
mpSample = retrieve_synset_from_csv(['/home/josu/Desktop/Monosemic_SYN-><-UKB.csv', '/home/josu/Desktop/Polysemic_SYN-><-UKB.csv'])

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/MonoPoly_WN->UKB_unsup.csv", pathWN_Dict, mpSample, 10)



/media/josu/Transcend/TFG/Embedding/AVG/WN_Mapped_GN_unsup_2.txt model loaded!
-----------------------------------------------------------
/media/josu/Transcend/TFG/Embedding/AVG/GN_Mapped_WN_unsup_2.txt model loaded!
-----------------------------------------------------------
0 out of 100
1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 o

21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53 out of 100
54 out of 100
55 out of 100
56 out of 100
57 out of 100
58 out of 100
59 out of 100
60 out of 100
61 out of 100
62 out of 100
63 out of 100
64 out of 100
65 out of 100
66 out of 100
67 out of 100
68 out of 100
69 out of 100
70 out of 100
71 out of 100
72 out of 100
73 out of 100
74 out of 100
75 out of 100
76 out of 100
77 out of 100
78 out of 100
79 out of 100
80 out of 100
81 out of 100
82 out of 100
83 out of 100
84 out of 100
85 out of 100
86 out of 100
87 out of 100
88 out of 100
89 out of 100
90 out of 100
91 out of 100
92 out

In [6]:
SYN_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/WN_Mapped_ESP_unsup.txt')
W_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/ESP_Mapped_WN_unsup.txt')
mSample = retrieve_synset_from_csv(['/home/josu/Desktop/Monosemic_SYN-><-UKB.csv'])
pSample = retrieve_synset_from_csv(['/home/josu/Desktop/Polysemic_SYN-><-UKB.csv'])
rSample = retrieve_synset_from_csv(['/home/josu/Desktop/Random_SYN-><-UKB.csv'])

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Random_WN->ESP_unsup.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Monosemic_WN->ESP_unsup.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Polysemic_WN->ESP_unsup.csv", pathWN_Dict, pSample, 10)



/home/josu/Desktop/vecmap-master_2/WN_Mapped_ESP_unsup.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master_2/ESP_Mapped_WN_unsup.txt model loaded!
-----------------------------------------------------------
0 out of 100
1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53

FileNotFoundError: [Errno 2] No such file or directory: '/home/josu/Desktop/vecmap-master_2/UKB_Mapped_WN_5_Mono.txt'

In [7]:
SYN_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/WN_Mapped_UKB_5_Mono.txt')
W_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/UKB_Mapped_WN_5_mono.txt')


precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Random_WN->UKB_5_mono.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Monosemic_WN->UKB_5_mono.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Polysemic_WN->UKB_5_mono.csv", pathWN_Dict, pSample, 10)

/home/josu/Desktop/vecmap-master_2/WN_Mapped_UKB_5_Mono.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master_2/UKB_Mapped_WN_5_mono.txt model loaded!
-----------------------------------------------------------
0 out of 100
1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100


In [8]:
SYN_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/WN_Mapped_ESP.txt')
W_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/ESP_Mapped_WN.txt')
mSample = retrieve_synset_from_csv(['/home/josu/Desktop/Monosemic_SYN-><-UKB.csv'])
pSample = retrieve_synset_from_csv(['/home/josu/Desktop/Polysemic_SYN-><-UKB.csv'])
rSample = retrieve_synset_from_csv(['/home/josu/Desktop/Random_SYN-><-UKB.csv'])
pathDictBi = '/home/josu/Desktop/dict_syn_es.txt'


precission_words_per_synset_csv_bi(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Random_WN->ESP.csv", pathWN_Dict, pathDictBi, rSample, 10)
precission_words_per_synset_csv_bi(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Monosemic_WN->ESP.csv", pathWN_Dict, pathDictBi, mSample, 10)
precission_words_per_synset_csv_bi(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Polysemic_WN->ESP.csv", pathWN_Dict, pathDictBi, pSample, 10)

/home/josu/Desktop/vecmap-master_2/WN_Mapped_ESP.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master_2/ESP_Mapped_WN.txt model loaded!
-----------------------------------------------------------
0 out of 100
11463746-n not found, will not be translated
11463746-n
1 out of 100
03191029-n not found, will not be translated
03191029-n
2 out of 100
13151975-n not found, will not be translated
13151975-n
3 out of 100
10330439-n not found, will not be translated
10330439-n
4 out of 100
03469493-n not found, will not be translated
03469493-n
5 out of 100
09289709-n not found, will not be translated
09289709-n
6 out of 100
02199502-n not found, will not be translated
02199502-n
7 out of 100
13612427-n not found, will not be translated
13612427-n
8 out of 100
07377082-n not found, will not be translated
07377082-n
9 out of 100
14831338-n not found, will not be translated
14831338-n
10 out of 100
10773126-n not found, will not be translat

15 out of 100
14555688-n not found, will not be translated
14555688-n
16 out of 100
13890226-n not found, will not be translated
13890226-n
17 out of 100
10277352-n not found, will not be translated
10277352-n
18 out of 100
06530412-n not found, will not be translated
06530412-n
19 out of 100
13184394-n not found, will not be translated
13184394-n
20 out of 100
09252766-n not found, will not be translated
09252766-n
21 out of 100
01734347-a not found, will not be translated
01734347-a
22 out of 100
13332395-n not found, will not be translated
13332395-n
23 out of 100
08353734-n not found, will not be translated
08353734-n
24 out of 100
13973769-n not found, will not be translated
13973769-n
25 out of 100
15236015-n not found, will not be translated
15236015-n
26 out of 100
01850742-a not found, will not be translated
01850742-a
27 out of 100
14781631-n not found, will not be translated
14781631-n
28 out of 100
00819756-n not found, will not be translated
00819756-n
29 out of 100
103579

34 out of 100
00234675-n not found, will not be translated
00234675-n
35 out of 100
 fantástico 01676517-a
36 out of 100
07297376-n not found, will not be translated
07297376-n
37 out of 100
01368597-v not found, will not be translated
01368597-v
38 out of 100
04039381-n not found, will not be translated
04039381-n
39 out of 100
 nick 03823540-n
40 out of 100
01962178-v not found, will not be translated
01962178-v
41 out of 100
13937554-n not found, will not be translated
13937554-n
42 out of 100
 ruido 04823031-n
43 out of 100
13449319-n not found, will not be translated
13449319-n
44 out of 100
00963671-a not found, will not be translated
00963671-a
45 out of 100
01772172-v not found, will not be translated
01772172-v
46 out of 100
01366809-v not found, will not be translated
01366809-v
47 out of 100
02462580-v not found, will not be translated
02462580-v
48 out of 100
04883804-n not found, will not be translated
04883804-n
49 out of 100
 cuarto 13847124-n
50 out of 100
 cuidado 0197

In [5]:
W1_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/UKB_Mapped_GN_unsup.txt')
W2_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/GN_Mapped_UKB_unsup.txt')

/home/josu/Desktop/vecmap-master_2/UKB_Mapped_GN_unsup.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master_2/GN_Mapped_UKB_unsup.txt model loaded!
-----------------------------------------------------------
0 out of 100


KeyError: '11463746-n'

In [14]:
path_WN_Dict_mono = '/home/josu/Desktop/vecmap-master/dict_monosemico2.txt'
path_WN_Dict_poly = '/home/josu/Downloads/dict_polisemico.txt'
mSample = random_sample_word(5, 'NULL', path_WN_Dict_mono)
pSample = random_sample_word(5, 'NULL', path_WN_Dict_poly)
rSample = random_sample_word(5, W1_Embedding, 'NULL')
print(len(mSample))
print(len(rSample))
print(len(pSample))


precission_words_per_word_csv(W1_Embedding, W2_Embedding, "/home/josu/Desktop/Resul/Random_UKB->GN.csv", rSample, 10)
precission_words_per_word_csv(W1_Embedding, W2_Embedding, "/home/josu/Desktop/Resul/Mono_UKB->GN.csv", mSample, 10)
precission_words_per_word_csv(W1_Embedding, W2_Embedding, "/home/josu/Desktop/Resul/Poli_UKB->GN.csv", pSample, 10)

5
5
5
0 out of 5
1 out of 5
2 out of 5
3 out of 5
4 out of 5
5 out of 5
Results in /home/josu/Desktop/Resul/Random_UKB->GN.csv
0 out of 5


KeyError: 'Ascaridia_galli'

In [16]:
mSample = random_sample_word(5, 'NULL', path_WN_Dict_mono)
pSample = random_sample_word(5, 'NULL', path_WN_Dict_poly)

precission_words_per_word_csv(W1_Embedding, W2_Embedding, "/home/josu/Desktop/Resul/Mono_UKB->GN.csv", mSample, 10)
precission_words_per_word_csv(W1_Embedding, W2_Embedding, "/home/josu/Desktop/Resul/Poli_UKB->GN.csv", pSample, 10)

0 out of 5
1 out of 5
2 out of 5
3 out of 5
4 out of 5
5 out of 5
Results in /home/josu/Desktop/Resul/Mono_UKB->GN.csv
0 out of 5
1 out of 5
2 out of 5
3 out of 5
4 out of 5


KeyError: 'Jacobs'

In [5]:
SYN_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/WN_Mapped_GN.txt')
W_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/GN_Mapped_WN.txt')
mSample = retrieve_synset_from_csv(['/home/josu/Desktop/Monosemic_SYN-><-UKB.csv'])
pSample = retrieve_synset_from_csv(['/home/josu/Desktop/Polysemic_SYN-><-UKB.csv'])
rSample = retrieve_synset_from_csv(['/home/josu/Desktop/Random_SYN-><-UKB.csv'])

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Random_WN->GN.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Monosemic_WN->GN.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Polysemic_WN->GN.csv", pathWN_Dict, pSample, 10)


/home/josu/Desktop/vecmap-master_2/WN_Mapped_GN.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master_2/GN_Mapped_WN.txt model loaded!
-----------------------------------------------------------
0 out of 100
1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53 out of 100
54

In [None]:
SYN_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/WN_Mapped_FT.txt')
W_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/FT_Mapped_WN.txt')
mSample = retrieve_synset_from_csv(['/home/josu/Desktop/Monosemic_SYN-><-UKB.csv'])
pSample = retrieve_synset_from_csv(['/home/josu/Desktop/Polysemic_SYN-><-UKB.csv'])
rSample = retrieve_synset_from_csv(['/home/josu/Desktop/Random_SYN-><-UKB.csv'])

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Random_WN->FT.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Monosemic_WN->FT.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Polysemic_WN->FT.csv", pathWN_Dict, pSample, 10)



/home/josu/Desktop/vecmap-master_2/WN_Mapped_FT.txt model loaded!
-----------------------------------------------------------
Error in talk
Error in τ
Error in —machine
Error in –
Error in html
Error in ã¥â¼âã¥â¥âã¥â
Error in gimnã
Error in ãâ´ãâ»ãâãâ¸ãâãâºãâ»ãâãâãâµãâ½ãâ¸ãâãâ¾ãâ±ãâ
Error in ãâ´ãâ»ãâãâ¸ãâãâºãâ»ãâãâãâµãâ½ãâ¸ãâãâ¾ãâ±ãâ
Error in ãâãâãâ¸ãâ½ãâµãâ¾ãâ±ãâ
Error in keikyū
Error in —
Error in talk
Error in —
Error in –
Error in –
Error in vandalism
Error in –
Error in wikipedia,
Error in wikipedia
Error in sources
Error in —
Error in —
Error in com/news/ã¤â¸âã¦âµâ·ã¦ââ¬ã¥â®â¶ã¥â
Error in the
Error in see
Error in —
Error in å
Error in com/up/ù
Error in #
Error in ^
Error in –
Error in nguyá»
Error in doctrine
Error in ð
Error in book
Error in —
Error in ,
Error in ‒
Error in miã
Error in the
Error in jesus
Error in –
Error in —
Error in –
Error in ryã
Error in /ãâãâ
Error in w
Error in i
Error in php/despre_plutrocaã
Er

In [5]:
SYN_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/WN_Mapped_FT_UKB.txt')
W_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/FT_UKB_Mapped_WN.txt')
mSample = retrieve_synset_from_csv(['/home/josu/Desktop/Monosemic_SYN-><-UKB.csv'])
pSample = retrieve_synset_from_csv(['/home/josu/Desktop/Polysemic_SYN-><-UKB.csv'])
rSample = retrieve_synset_from_csv(['/home/josu/Desktop/Random_SYN-><-UKB.csv'])

precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Random_WN->FT+UKB.csv", pathWN_Dict, rSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Monosemic_WN->FT+UKB.csv", pathWN_Dict, mSample, 10)
precission_words_per_synset_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Polysemic_WN->FT+UKB.csv", pathWN_Dict, pSample, 10)

/home/josu/Desktop/vecmap-master_2/WN_Mapped_FT_UKB.txt model loaded!
-----------------------------------------------------------
/home/josu/Desktop/vecmap-master_2/FT_UKB_Mapped_WN.txt model loaded!
-----------------------------------------------------------
0 out of 100
1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53 out o

In [None]:
SYN_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/WN_Mapped_FT_UKB.txt')
W_Embedding = load_embedding('/home/josu/Desktop/vecmap-master_2/FT_UKB_Mapped_WN.txt')
rSample = retrieve_synset_from_csv(['/home/josu/Desktop/Random_SYN-><-UKB.csv'])

In [31]:
calculate_precission_csv(SYN_Embedding, W_Embedding, "/home/josu/Desktop/Resul/Random_WN->FT+UKB_P.csv", pathWN_Dict, 0.60, rSample, 10)

0 out of 100
1 out of 100
2 out of 100
3 out of 100
4 out of 100
5 out of 100
6 out of 100
7 out of 100
8 out of 100
9 out of 100
10 out of 100
11 out of 100
12 out of 100
13 out of 100
14 out of 100
15 out of 100
16 out of 100
17 out of 100
18 out of 100
19 out of 100
20 out of 100
21 out of 100
22 out of 100
23 out of 100
24 out of 100
25 out of 100
26 out of 100
27 out of 100
28 out of 100
29 out of 100
30 out of 100
31 out of 100
32 out of 100
33 out of 100
34 out of 100
35 out of 100
36 out of 100
37 out of 100
38 out of 100
39 out of 100
40 out of 100
41 out of 100
42 out of 100
43 out of 100
44 out of 100
45 out of 100
46 out of 100
47 out of 100
48 out of 100
49 out of 100
50 out of 100
51 out of 100
52 out of 100
53 out of 100
54 out of 100
55 out of 100
56 out of 100
57 out of 100
58 out of 100
59 out of 100
60 out of 100
61 out of 100
62 out of 100
63 out of 100
64 out of 100
65 out of 100
66 out of 100
67 out of 100
68 out of 100
69 out of 100
70 out of 100
71 out of 100
72