In [1]:
import numpy as np

In [2]:
#Gaussian Boson Sampling
#GBS Experiment 2
#Dataset size is 12 x 12
# 3 biclusters of size 3 x 3 each 

In [3]:
#Seeding for reproducibility
np.random.seed(4)

#Our biclusters (values are one of these 0.9,0.8,0.7)
#First init to zero
biclusters = np.zeros([3,4,4]) # 3 biclusters each of size 3 x3


for i in range(0,3):
    biclusters[i] = np.random.choice([0.9,0.8,0.7], [4,4], replace=True) # a 4 x 4 bicluster

In [4]:
biclusters

array([[[0.7, 0.7, 0.8, 0.8],
        [0.9, 0.9, 0.7, 0.8],
        [0.7, 0.9, 0.8, 0.7],
        [0.8, 0.9, 0.9, 0.7]],

       [[0.9, 0.7, 0.7, 0.7],
        [0.9, 0.9, 0.8, 0.8],
        [0.8, 0.7, 0.8, 0.7],
        [0.9, 0.7, 0.7, 0.8]],

       [[0.7, 0.9, 0.8, 0.7],
        [0.7, 0.9, 0.9, 0.7],
        [0.8, 0.8, 0.9, 0.7],
        [0.8, 0.9, 0.7, 0.7]]])

In [5]:
#Okay now we embed them into a dataset where the rest of the values are 0,0.1 and 0.2s
D = np.random.choice([0,0.1,0.2],[12,12],replace=True)

#Embed the biclusters (this part I am hard coding it in)
#0th
D[0:4,0:4] = biclusters[0]
#1st
D[4:8,4:8] = biclusters[1]
#2nd
D[8:12,8:12] = biclusters[2]

# looking at D
print(D)

[[0.7 0.7 0.8 0.8 0.1 0.1 0.1 0.  0.  0.2 0.2 0.2]
 [0.9 0.9 0.7 0.8 0.2 0.1 0.2 0.1 0.2 0.1 0.  0.1]
 [0.7 0.9 0.8 0.7 0.1 0.  0.2 0.1 0.2 0.  0.2 0. ]
 [0.8 0.9 0.9 0.7 0.1 0.2 0.1 0.1 0.1 0.  0.2 0.2]
 [0.  0.2 0.2 0.  0.9 0.7 0.7 0.7 0.  0.2 0.2 0.1]
 [0.1 0.  0.2 0.1 0.9 0.9 0.8 0.8 0.2 0.2 0.  0. ]
 [0.  0.  0.2 0.1 0.8 0.7 0.8 0.7 0.1 0.  0.2 0.1]
 [0.2 0.2 0.  0.  0.9 0.7 0.7 0.8 0.1 0.2 0.2 0.2]
 [0.1 0.  0.  0.  0.1 0.  0.1 0.1 0.7 0.9 0.8 0.7]
 [0.2 0.1 0.1 0.  0.1 0.2 0.  0.2 0.7 0.9 0.9 0.7]
 [0.  0.  0.  0.  0.2 0.2 0.2 0.1 0.8 0.8 0.9 0.7]
 [0.  0.  0.2 0.  0.2 0.2 0.2 0.1 0.8 0.9 0.7 0.7]]


In [6]:
#Okay so far so good, but we need to shuffle them up (while keeping track of where all the biclusters went!
#both for cols and rows
biclusters_old_idx = []
#0th
biclusters_old_idx.append([0,1,2,3])
#1st
biclusters_old_idx.append([4,5,6,7])
#2nd
biclusters_old_idx.append([8,9,10,11])

In [7]:
biclusters_col_idx = []
biclusters_row_idx = []

col_idx = np.random.permutation(12)
row_idx = np.random.permutation(12)

#Locate the columns and rows of the bicluster
#for both bicluster_row_idx and bicluster_col_idx: first column is idx position in new dataset, second is idx position in old dataset

for k in range(0,3):
    biclusters_col_idx.append([])
    biclusters_row_idx.append([])
    
    for i in range(0,len(col_idx)):
        if col_idx[i] in biclusters_old_idx[k]:
            biclusters_col_idx[k].append([i, col_idx[i]])
        if row_idx[i] in biclusters_old_idx[k]:
            biclusters_row_idx[k].append([i, row_idx[i]])

print(col_idx)
print(row_idx)
for i in range(0,3):
    print("Bicluster #:",i)
    print(f"Bicluster is in cols: {biclusters_col_idx[i]}, rows: {biclusters_row_idx[i]}")

[ 4  0 10  6  7  8  1  9  5 11  3  2]
[11 10  0  3  1  7  2  6  9  4  5  8]
Bicluster #: 0
Bicluster is in cols: [[1, 0], [6, 1], [10, 3], [11, 2]], rows: [[2, 0], [3, 3], [4, 1], [6, 2]]
Bicluster #: 1
Bicluster is in cols: [[0, 4], [3, 6], [4, 7], [8, 5]], rows: [[5, 7], [7, 6], [9, 4], [10, 5]]
Bicluster #: 2
Bicluster is in cols: [[2, 10], [5, 8], [7, 9], [9, 11]], rows: [[0, 11], [1, 10], [8, 9], [11, 8]]


In [8]:
#sort according to second column of  bicluster_col_idx
for i in range(0,3):
    biclusters_col_idx = np.array(biclusters_col_idx)
    biclusters_col_idx[i] = biclusters_col_idx[i][biclusters_col_idx[i][:, 1].argsort()]
    print(biclusters_col_idx[i])
    print("---")

[[ 1  0]
 [ 6  1]
 [11  2]
 [10  3]]
---
[[0 4]
 [8 5]
 [3 6]
 [4 7]]
---
[[ 5  8]
 [ 7  9]
 [ 2 10]
 [ 9 11]]
---


In [9]:
#sort according to second column of  biclusters_row_idx
for i in range(0,3):
    biclusters_row_idx = np.array(biclusters_row_idx)
    biclusters_row_idx[i] = biclusters_row_idx[i][biclusters_row_idx[i][:, 1].argsort()]
    print(biclusters_row_idx[i])
    print("---")

[[2 0]
 [4 1]
 [6 2]
 [3 3]]
---
[[ 9  4]
 [10  5]
 [ 7  6]
 [ 5  7]]
---
[[11  8]
 [ 8  9]
 [ 1 10]
 [ 0 11]]
---


In [10]:
#Now for the actual shuffling
#Shuffle the columns of dataset and copy it to new_dataset
new_D = D[:,col_idx].copy()
new_D

array([[0.1, 0.7, 0.2, 0.1, 0. , 0. , 0.7, 0.2, 0.1, 0.2, 0.8, 0.8],
       [0.2, 0.9, 0. , 0.2, 0.1, 0.2, 0.9, 0.1, 0.1, 0.1, 0.8, 0.7],
       [0.1, 0.7, 0.2, 0.2, 0.1, 0.2, 0.9, 0. , 0. , 0. , 0.7, 0.8],
       [0.1, 0.8, 0.2, 0.1, 0.1, 0.1, 0.9, 0. , 0.2, 0.2, 0.7, 0.9],
       [0.9, 0. , 0.2, 0.7, 0.7, 0. , 0.2, 0.2, 0.7, 0.1, 0. , 0.2],
       [0.9, 0.1, 0. , 0.8, 0.8, 0.2, 0. , 0.2, 0.9, 0. , 0.1, 0.2],
       [0.8, 0. , 0.2, 0.8, 0.7, 0.1, 0. , 0. , 0.7, 0.1, 0.1, 0.2],
       [0.9, 0.2, 0.2, 0.7, 0.8, 0.1, 0.2, 0.2, 0.7, 0.2, 0. , 0. ],
       [0.1, 0.1, 0.8, 0.1, 0.1, 0.7, 0. , 0.9, 0. , 0.7, 0. , 0. ],
       [0.1, 0.2, 0.9, 0. , 0.2, 0.7, 0.1, 0.9, 0.2, 0.7, 0. , 0.1],
       [0.2, 0. , 0.9, 0.2, 0.1, 0.8, 0. , 0.8, 0.2, 0.7, 0. , 0. ],
       [0.2, 0. , 0.7, 0.2, 0.1, 0.8, 0. , 0.9, 0.2, 0.7, 0. , 0.2]])

In [11]:
#Shuffle the rows of new_dataset
new_D = new_D[row_idx,:]
new_D

array([[0.2, 0. , 0.7, 0.2, 0.1, 0.8, 0. , 0.9, 0.2, 0.7, 0. , 0.2],
       [0.2, 0. , 0.9, 0.2, 0.1, 0.8, 0. , 0.8, 0.2, 0.7, 0. , 0. ],
       [0.1, 0.7, 0.2, 0.1, 0. , 0. , 0.7, 0.2, 0.1, 0.2, 0.8, 0.8],
       [0.1, 0.8, 0.2, 0.1, 0.1, 0.1, 0.9, 0. , 0.2, 0.2, 0.7, 0.9],
       [0.2, 0.9, 0. , 0.2, 0.1, 0.2, 0.9, 0.1, 0.1, 0.1, 0.8, 0.7],
       [0.9, 0.2, 0.2, 0.7, 0.8, 0.1, 0.2, 0.2, 0.7, 0.2, 0. , 0. ],
       [0.1, 0.7, 0.2, 0.2, 0.1, 0.2, 0.9, 0. , 0. , 0. , 0.7, 0.8],
       [0.8, 0. , 0.2, 0.8, 0.7, 0.1, 0. , 0. , 0.7, 0.1, 0.1, 0.2],
       [0.1, 0.2, 0.9, 0. , 0.2, 0.7, 0.1, 0.9, 0.2, 0.7, 0. , 0.1],
       [0.9, 0. , 0.2, 0.7, 0.7, 0. , 0.2, 0.2, 0.7, 0.1, 0. , 0.2],
       [0.9, 0.1, 0. , 0.8, 0.8, 0.2, 0. , 0.2, 0.9, 0. , 0.1, 0.2],
       [0.1, 0.1, 0.8, 0.1, 0.1, 0.7, 0. , 0.9, 0. , 0.7, 0. , 0. ]])

In [12]:
#Now save it to the npz
#arr_0 : new_D
#arr_1 : row_idx
#arr_2 : col_idx
#arr_3 : bicluster_row_idx
#arr_4 : bicluster_col_idx
np.savez('./problems/gbs_exp2_part1.npz',new_D,row_idx,col_idx,biclusters_row_idx,biclusters_col_idx)