## Encoder generator: 002

In [1]:
import os

import random
import numpy


import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import AdamOptimizer, GradientDescentOptimizer

import torch

import sys
sys.path.append("..")

from wordsToNumbers import Corpus
from wordsToNumbers import fibonacci_vocabulary

from wordsToQubits import put_word_on_sphere

from utils import get_corpus_from_directory, working_window, get_word_from_sphere

from qencode.initialize import setAux
from qencode.encoders import e2_classic
from qencode.training_circuits import swap_t
from qencode.qubits_arrangement import QubitsArrangement

from training.cost.swap_cost import sw_cost, sw_fidelity

In [2]:
np.random.seed(73)

## Corpus

In [3]:
"""
corpus_path="C:/Users/tomut/Documents/GitHub/Qountry/CountryMixt/"

corpus_tex = get_corpus_from_directory(corpus_path, limit=1)

corpus= Corpus(corpus_tex)
print(corpus.prop())
"""
corpus_text = "Same old dive, same old end of the work week drink Bartender knows my name,"#but I don't mind She kicks 'em up strong, serves me up right And here I go again I'm drinkin' one, I'm drinkin' two I got my heartache medication, a strong dedication To gettin' over you, turnin' me loose On that hardwood jukebox lost in neon time My heartache medication, well it suits me fine And I'm drinkin' enough to take you off my mind I got my heartache medication"
corpus= Corpus(corpus_text)
print(corpus.prop())

nr. words:15 
nr. distinct words: 13 
len.text/len.vocab:1.1538461538461537


In [4]:
parameterize_vovabulary = fibonacci_vocabulary(corpus.vocabulary)

## Training set 

In [5]:
history_lenghth = 3

In [6]:
x,y = working_window(history_lenghth, splited_text=corpus.split_text)

In [7]:
print("len training set:", len(x))

len training set: 11


## Working principles

## Just a simple encoder

### BIG encoder

In [8]:
shots = 2500
nr_trash=2
nr_latent=2

spec_big = QubitsArrangement(nr_trash, nr_latent, nr_swap=1, nr_ent=0)
print("Qubits:", spec_big.qubits)

#set up the device 
dev = qml.device("default.qubit", wires=spec_big.num_qubits)

Qubits: [0, 1, 2, 3, 4, 5, 6]


In [9]:
parameterize_vovabulary = fibonacci_vocabulary(corpus.vocabulary)

In [10]:
# circuit initializer
def circuit_initializer(words, qubits):
    
    for i in range(len(words)):
        print("i words",words)
        put_word_on_sphere(words[i], qubit=qubits[i])


@qml.qnode(dev)
def encoder_e2(init_params, encoder_params,spec=spec_big, reinit_state=None):
    #initilaization
    circuit_initializer(init_params,qubits= [*spec.latent_qubits, *spec.trash_qubits])
   
    
    #encoder
    for params in encoder_params:
        e2_classic(params, [*spec.latent_qubits, *spec_big.trash_qubits])

    #swap test 
    swap_t(spec)

    return [qml.probs(i) for i in spec.swap_qubits]

#### Training parameters

In [11]:
epochs = 50

batch_size = 2
num_samples = 0.8 # proportion of the data used for training 

learning_rate = 0.0003
beta1 = 0.9
beta2 = 0.999
opt = AdamOptimizer(learning_rate, beta1=beta1, beta2=beta2)

In [12]:
training_data = []

for i in range(int(len(x)*num_samples)):
    w_l = [ parameterize_vovabulary[w] for w in x[i]]
    w_l.append(parameterize_vovabulary[y[i]])
    training_data.append(w_l)
training_data = torch.tensor(training_data)

test_data = [ ]
for i in range(int(len(x)*num_samples),len(x)):
    w_l=[parameterize_vovabulary[w] for w in x[i]]
    w_l.append(parameterize_vovabulary[y[i]])
    test_data.append(w_l)
test_data = torch.tensor(test_data)

print("data example:",training_data[0])

data example: tensor([[-0.8005, -0.5000,  0.3304],
        [ 0.0000,  1.0000,  0.0000],
        [-0.4545, -0.1667, -0.8750],
        [-0.8005, -0.5000,  0.3304]])


In [13]:
def iterate_batches(X, batch_size):
   
    X1 = [x  for x in X]
    
    
    random.shuffle(X1)

    batch_list = []
    batch = []
    for x in X:
        if len(batch) < batch_size:
            batch.append(x)

        else:
            batch_list.append(batch)
            batch = []
    if len(batch) != 0:
        batch_list.append(batch)
    return batch_list

In [14]:
batch_list=iterate_batches(X=training_data, batch_size=2)
batch_list

[[tensor([[-0.8005, -0.5000,  0.3304],
          [ 0.0000,  1.0000,  0.0000],
          [-0.4545, -0.1667, -0.8750],
          [-0.8005, -0.5000,  0.3304]]),
  tensor([[ 0.0000,  1.0000,  0.0000],
          [-0.4545, -0.1667, -0.8750],
          [-0.8005, -0.5000,  0.3304],
          [ 0.0000,  1.0000,  0.0000]])],
 [tensor([[-0.8005, -0.5000,  0.3304],
          [ 0.0000,  1.0000,  0.0000],
          [-0.0000, -1.0000, -0.0000],
          [-0.9284,  0.3333, -0.1642]]),
  tensor([[ 0.0000,  1.0000,  0.0000],
          [-0.0000, -1.0000, -0.0000],
          [-0.9284,  0.3333, -0.1642],
          [ 0.8856, -0.3333,  0.3234]])],
 [tensor([[-0.9284,  0.3333, -0.1642],
          [ 0.8856, -0.3333,  0.3234],
          [-0.4076,  0.8333,  0.3734],
          [ 0.5269,  0.5000,  0.6873]]),
  tensor([[ 0.8856, -0.3333,  0.3234],
          [-0.4076,  0.8333,  0.3734],
          [ 0.5269,  0.5000,  0.6873],
          [ 0.3159, -0.6667, -0.6751]])]]

###  training

In [15]:
# initialize random encoder parameters
nr_encod_qubits = len(spec_big.trash_qubits) + len(spec_big.latent_qubits)
nr_par_encoder =  15 * int(nr_encod_qubits*(nr_encod_qubits-1)/2)
encoder_params = np.random.uniform(size=(1, nr_par_encoder), requires_grad=True)

#print(qml.draw(encoder_e2)(init_params=training_data[0], encoder_params=encoder_params, spec=spec_big))

In [16]:
def cost(encoder_params, X):
    return sw_cost(encoder_params, input_data=X, circuit=encoder_e2, reinit_state=None)

def fidelity(encoder_params, X):
    print("x",X)
    return sw_fidelity(encoder_params, input_data=X, circuit=encoder_e2, reinit_state=None)
  
    
loss_hist=[]
fid_hist=[]

loss_hist_test=[]
fid_hist_test=[]

for epoch in range(epochs):
    batches = iterate_batches(X=training_data, batch_size=batch_size)
    for xbatch in batches:
        encoder_params = opt.step(cost, encoder_params, X=xbatch)

        
    if epoch%5 == 0:
        
        loss_training = cost(encoder_params, [training_data] )
        fidel = fidelity(encoder_params, training_data )
        
        loss_hist.append(loss_training)
        fid_hist.append(fidel)
        print("Epoch:{} | Loss:{} | Fidelity:{}".format(epoch, loss_training, fidel))

        loss_test = cost(encoder_params, test_data )
        fidel = fidelity(encoder_params, test_data )
        loss_hist_test.append(loss_test)
        fid_hist_test.append(fidel)
        print("Test-Epoch:{} | Loss:{} | Fidelity:{}".format(epoch, loss_test, fidel))
        
        
        
        
        """
        experiment_parameters={"experiment":"Encoder_002","params":encoder_params}
        f=open("Encoder_002_/params"+str(epoch)+".txt","w")
        f.write(str(experiment_parameters))
        f.close()
        """



RuntimeError: Boolean value of Tensor with more than one value is ambiguous