## Encoding Decoding check

In [1]:
import os

import pennylane as qml
from pennylane import numpy as np


from wordsToNumbers import Corpus
from wordsToNumbers import fibonacci_vocabulary

from wordsToQubits import put_word_on_sphere

### Corpus

In [2]:
def song_as_corpus(path,limit=1):
    lines = []
    with open(path) as f:
        lines = f.readlines()

    corpus_text  = " "
    for index in range(int(len(lines)*limit)):
        corpus_text =  corpus_text+ "\n "+ lines[index]
        
    return corpus_text

In [3]:
country_mixt_path='CountryMixt'


songs_files = os.listdir(country_mixt_path)

corpus_text = ""
for f in songs_files:
    if f[0] != ".":
        path = country_mixt_path+"/"+f
        print(path)
        corpus_text = corpus_text+song_as_corpus(path,limit=1) + " "
        
        

CountryMixt/original.txt
CountryMixt/InMyTimeOfDying.txt
CountryMixt/BigDreamsAndFadedJeans.txt
CountryMixt/ShesNoGood.txt
CountryMixt/ManOfConstantSorrow.txt
CountryMixt/PettyPeggy.txt
CountryMixt/FeelingFunnyInMyMindLord.txt
CountryMixt/TalkinNewYork.txt
CountryMixt/BabyLetMeFollowYouDown.txt


In [4]:
corpus= Corpus(corpus_text)
print(corpus.prop())

nr. words:1648 
nr. distinct words: 499 
len.text/len.vocab:3.302605210420842


### Encoding

In [5]:
parameterize_vovabulary = fibonacci_vocabulary(corpus.vocabulary)
parameterize_vovabulary 

{'light': [0.0, 1.0, 0.0],
 'sky': [-0.06601819570659982, 0.9959839357429718, 0.060478075180885736],
 'nobody': [0.011058487466867974, 0.9919678714859438, -0.12600576095794697],
 'stage': [0.09416311661652535, 0.9879518072289156, 0.12281911114391847],
 'hillbilly': [-0.17579398395351573, 0.9839357429718876, -0.03109548050940358],
 'm': [0.16823859232153715, 0.9799196787148594, -0.10701962119646269],
 'meet': [-0.05664619191328753, 0.9759036144578314, 0.21072101041392016],
 'blowed': [-0.10851849104812623, 0.9718875502008032, -0.2089457510104454],
 'jeans': [0.23618801444269322, 0.9678714859437751, 0.08625548406105461],
 'great': [-0.24626953754128653, 0.963855421686747, 0.1016564851067754],
 'lions': [0.11891007354997837, 0.9598393574297188, -0.25410392035000795],
 'see': [0.08797206350487134, 0.9558232931726908, 0.2804688008873844],
 'lord': [-0.2653576107072834, 0.9518072289156626, -0.1537801593951876],
 'abound': [0.3114537853452374, 0.9477911646586346, -0.0684722410116087],
 'backi

In [6]:
dev = qml.device("default.qubit", wires=1)

# encode the world
@qml.qnode(dev)
def w_encode(word_params, obs='z'):
    
    put_word_on_sphere(word_params, qubit=0)
    
    # measure 
    if obs=='z':
        return  qml.expval(qml.PauliZ(0))
    if obs=='x':
        return qml.expval(qml.PauliX(0))
    if obs=='y':
        return qml.expval(qml.PauliY(0))
    


In [7]:
def get_word_prediction(pred_vector,parameterize_vovabulary):
  
    dist= 999.0 # max possible distance is <=3
    extract_word="-"
    for w in parameterize_vovabulary :
        w_param=parameterize_vovabulary[w]
        new_d= np.sqrt((w_param[0]-pred_vector[0])**2+(w_param[1]-pred_vector[1])**2+(w_param[2]-pred_vector[2])**2)
        if new_d<dist:
            next_word = str(w)
            dist=new_d

    return next_word

In [8]:
#parameterize_vovabulary['in']= [0.7723651124128641, 0.0, 0.635178819803891]
word=parameterize_vovabulary['are']
w_decode_param = [ w_encode(word, obs=o) for o in ['x', 'y', 'z']]
print("pred:",w_decode_param)
print("original:",word)

pred: [tensor(0.39835802, requires_grad=True), tensor(0.91566265, requires_grad=True), tensor(0.05359851, requires_grad=True)]
original: [0.398358017676368, 0.9156626506024097, 0.05359850785911683]


In [9]:
get_word_prediction(w_decode_param,parameterize_vovabulary)

'are'

In [10]:
errors=[]
corect=0
for w in parameterize_vovabulary:
    pred_vector=[ w_encode(parameterize_vovabulary[w], obs=o) for o in ['x', 'y', 'z']]
    extracted_word=get_word_prediction(pred_vector,parameterize_vovabulary)
    if extracted_word !=w:
        #print("ew: ",extracted_word)
        #print("w: ",w)
        errors.append(w)
    else:
        corect=corect+1


In [11]:
print("nr errors:",len(errors))
print("corecte:", corect)
print("%:",corect/(corect+len(errors)))

nr errors: 0
corecte: 499
%: 1.0


In [12]:
print("errors")
print(errors)

errors
[]
