## Encoding Decoding check

In [1]:
import os

import pennylane as qml
from pennylane import numpy as np

import sys
sys.path.append("..")
from wordsToNumbers import Corpus
from wordsToNumbers import number_vocabulary

from wordsToQubits import put_word_on_qubits

from utils import get_corpus_from_directory, working_window, get_word_from_sphere

### Corpus

In [2]:
corpus_path='/Users/voicutu/Documents/GitHub/Qountry/CountryMixt'

corpus_tex = get_corpus_from_directory(corpus_path, limit=1)

corpus= Corpus(corpus_tex)
print(corpus.prop())

nr. words:1648 
nr. distinct words: 499 
len.text/len.vocab:3.302605210420842


### Encoding

In [3]:
parameterize_vovabulary_1 = number_vocabulary(corpus.vocabulary[:34], )
parameterize_vovabulary_1

{'buried': 0,
 'buildings': 1,
 'pastures': 2,
 'lions': 3,
 'drives': 4,
 'at': 5,
 'ended': 6,
 'i': 7,
 'suits': 8,
 'downs': 9,
 'said': 10,
 'same': 11,
 'easy': 12,
 'put': 13,
 'union': 14,
 'n': 15,
 'winter': 16,
 'follow': 17,
 'very': 18,
 'satisfied': 19,
 'singers': 20,
 'texas': 21,
 'golden': 22,
 'darned': 23,
 'cold': 24,
 'finally': 25,
 'once': 26,
 'mama': 27,
 'two': 28,
 'sweet': 29,
 'from': 30,
 'jukebox': 31,
 'died': 32,
 'lonesome': 33}

In [4]:
parameterize_vovabulary_2 = number_vocabulary(corpus.vocabulary[:33], binar=True )
parameterize_vovabulary_2

{'buried': '000000',
 'buildings': '000001',
 'pastures': '000010',
 'lions': '000011',
 'drives': '000100',
 'at': '000101',
 'ended': '000110',
 'i': '000111',
 'suits': '001000',
 'downs': '001001',
 'said': '001010',
 'same': '001011',
 'easy': '001100',
 'put': '001101',
 'union': '001110',
 'n': '001111',
 'winter': '010000',
 'follow': '010001',
 'very': '010010',
 'satisfied': '010011',
 'singers': '010100',
 'texas': '010101',
 'golden': '010110',
 'darned': '010111',
 'cold': '011000',
 'finally': '011001',
 'once': '011010',
 'mama': '011011',
 'two': '011100',
 'sweet': '011101',
 'from': '011110',
 'jukebox': '011111',
 'died': '100000'}

In [5]:
dev = qml.device("default.qubit", wires=[i for i in range(6)])

# encode the world
@qml.qnode(dev)
def w_encode(word_params):
    
    put_word_on_qubits(word_params, qubits=[i for i in range(6)])
    
    # measure 
    return qml.probs(wires=[i for i in range(6)])

In [6]:
import math

def get_word_from_prob(prob,parameterize_vovabulary, binar=False):
  
    max_i=0.0
    max_p=0.0
    
    for i in range(len(prob)):
        if (prob[i]>max_p):
            max_p = prob[i]
            max_i = i
            
    pred=max_i
    
    next_word = "_"
    if binar:
        nr_qubits = math.ceil(math.log(len(prob)) / math.log(2))
        pred=bin(pred)[2:].zfill(int(nr_qubits))
    
    for w in parameterize_vovabulary :
        if str(parameterize_vovabulary[w]) == str(pred):
            next_word = w
            break
    return next_word

In [8]:
word = 'cold'
word_params=parameterize_vovabulary_2[word]
w_decode_param = w_encode(word_params)
print("pred:",w_decode_param)
print("original:",word)

pred: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
original: cold


In [9]:
get_word_from_prob(w_decode_param,parameterize_vovabulary_1)

prob 1.0
i 24
24
0
24
1
24
2
24
3
24
4
24
5
24
6
24
7
24
8
24
9
24
10
24
11
24
12
24
13
24
14
24
15
24
16
24
17
24
18
24
19
24
20
24
21
24
22
24
23
24
24


'cold'

In [11]:
get_word_from_prob(w_decode_param,parameterize_vovabulary_2,binar=True)

prob 1.0
i 24
011000
000000
011000
000001
011000
000010
011000
000011
011000
000100
011000
000101
011000
000110
011000
000111
011000
001000
011000
001001
011000
001010
011000
001011
011000
001100
011000
001101
011000
001110
011000
001111
011000
010000
011000
010001
011000
010010
011000
010011
011000
010100
011000
010101
011000
010110
011000
010111
011000
011000


'cold'

In [13]:
errors=[]
corect=0
for w in parameterize_vovabulary_2:
    pred_vector=w_encode(parameterize_vovabulary_2[w],)
    extracted_word=get_word_from_prob(pred_vector,parameterize_vovabulary_1)
    if extracted_word !=w:
        #print("ew: ",extracted_word)
        #print("w: ",w)
        errors.append(w)
    else:
        corect=corect+1

prob 1.0
i 0
0
0
prob 1.0
i 1
1
0
1
1
prob 1.0
i 2
2
0
2
1
2
2
prob 1.0
i 3
3
0
3
1
3
2
3
3
prob 1.0
i 4
4
0
4
1
4
2
4
3
4
4
prob 1.0
i 5
5
0
5
1
5
2
5
3
5
4
5
5
prob 1.0
i 6
6
0
6
1
6
2
6
3
6
4
6
5
6
6
prob 1.0
i 7
7
0
7
1
7
2
7
3
7
4
7
5
7
6
7
7
prob 1.0
i 8
8
0
8
1
8
2
8
3
8
4
8
5
8
6
8
7
8
8
prob 1.0
i 9
9
0
9
1
9
2
9
3
9
4
9
5
9
6
9
7
9
8
9
9
prob 1.0
i 10
10
0
10
1
10
2
10
3
10
4
10
5
10
6
10
7
10
8
10
9
10
10
prob 1.0
i 11
11
0
11
1
11
2
11
3
11
4
11
5
11
6
11
7
11
8
11
9
11
10
11
11
prob 1.0
i 12
12
0
12
1
12
2
12
3
12
4
12
5
12
6
12
7
12
8
12
9
12
10
12
11
12
12
prob 1.0
i 13
13
0
13
1
13
2
13
3
13
4
13
5
13
6
13
7
13
8
13
9
13
10
13
11
13
12
13
13
prob 1.0
i 14
14
0
14
1
14
2
14
3
14
4
14
5
14
6
14
7
14
8
14
9
14
10
14
11
14
12
14
13
14
14
prob 1.0
i 15
15
0
15
1
15
2
15
3
15
4
15
5
15
6
15
7
15
8
15
9
15
10
15
11
15
12
15
13
15
14
15
15
prob 1.0
i 16
16
0
16
1
16
2
16
3
16
4
16
5
16
6
16
7
16
8
16
9
16
10
16
11
16
12
16
13
16
14
16
15
16
16
prob 1.0
i 17
17
0
17
1
17
2
17
3


In [14]:
print("nr errors:",len(errors))
print("corecte:", corect)
print("%:",corect/(corect+len(errors)))

nr errors: 0
corecte: 33
%: 1.0


In [15]:
print("errors")
print(errors)

errors
[]
