# Algoritmo de transposición de conjunto de datos generalizado

En este notebook se ha desarrollado un algoritmo que permite hacer transformaciones a datasets de *n* dimensiones. Con el siguiente ejemplo se va entender cómo funciona y cuáles són las motivaciones para ser desarrollado.

## Ejemplo

In [1]:
import numpy as np

# Se genera una conjunto de datos inicial

src = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]])

# Se genera el conjunto de datos que queremos obtener si las subdimensiones fueran (2, 2)

dest = np.array([[1, 2, 5, 6], [3, 4, 7, 8], [9, 10, 13, 14], [11, 12, 15, 16]])

# Se imprime los resultados

print("\nLos datos originales son:\n")
print(src)

print("\nLos datos modificados son:\n")
print(dest)


Los datos originales son:

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]
 [13 14 15 16]]

Los datos modificados son:

[[ 1  2  5  6]
 [ 3  4  7  8]
 [ 9 10 13 14]
 [11 12 15 16]]


Como se observa, la idea de la trasformación es colocar próximos (en sentido lineal) los datos que están próximos teniendo en cuenta las dimensiones.

Por ejemplo, si consideramos las matrices como vectores, el número 2 tiene como elementos más próximos al 1 y al 3, pero si la consideramos como matriz tiene también al 5, 6, 7. sin embargo, si consideramos la matriz como vector estos elementos están alejados del 2.

Después de la transformación, vemos que el 5 y el 6, que antes estaban lejos, ahora están más próximos. Cuanto más grande son los tamaños de las dimensiones, este resultado se ve más claro.

## Desarrollo del algoritmo

### Algoritmo final en Python

In [11]:
def t_dataP(data, sub_shp, inverse=False):
    
    dim = [0, 0, 0, 0, 0, 0, 0, 0]
    sho = [1, 1, 1, 1, 1, 1, 1, 1]
    shp = [1, 1, 1, 1, 1, 1, 1, 1]
    sub = [1, 1, 1, 1, 1, 1, 1, 1]
    
    dest = np.zeros(data.shape, dtype=data.dtype).flatten()
       
    org_shp = list(data.shape)
        
    dta_shp = [org_shp[i] - org_shp[i]%sub_shp[i] for i in range(len(org_shp))]
    
    for i in range(len(dta_shp)):
        dim[7 - i] = 1
        shp[7 - i] = dta_shp[i]
        sub[7 - i] = sub_shp[i]
        sho[7 - i] = org_shp[i]

    cont = 0
    
    K = 0    

    data_aux = np.copy(data.flatten())
    
    if inverse == False:
        
        while K <  shp[0]*shp[1]*shp[2]*shp[3]*shp[4]*shp[5]*shp[6]*shp[7]:

            if (((K + cont)%sho[0] >= sho[0] - sho[0]%sub[0])
                or ((K + cont)%(sho[0]*sho[1])//(sho[0]) >= sho[1] - sho[1]%sub[1])
                or ((K + cont)%(sho[0]*sho[1]*sho[2])//(sho[0]*sho[1]) >= sho[2] - sho[2]%sub[2])
                or ((K + cont)%(sho[0]*sho[1]*sho[2]*sho[3])//(sho[0]*sho[1]*sho[2]) >= sho[3] - sho[3]%sub[3])
                or ((K + cont)%(sho[0]*sho[1]*sho[2]*sho[3]*sho[4])//(sho[0]*sho[1]*sho[2]*sho[3]) >= sho[4] - sho[4]%sub[4])
                or ((K + cont)%(sho[0]*sho[1]*sho[2]*sho[3]*sho[4]*sho[5])//(sho[0]*sho[1]*sho[2]*sho[3]*sho[4]) >= sho[5] - sho[5]%sub[5])
                or ((K + cont)%(sho[0]*sho[1]*sho[2]*sho[3]*sho[4]*sho[5]*sho[6])//(sho[0]*sho[1]*sho[2]*sho[3]*sho[4]*sho[5]) >= sho[6] - sho[6]%sub[6])
                or ((K + cont)%(sho[0]*sho[1]*sho[2]*sho[3]*sho[4]*sho[5]*sho[6]*sho[7])//(sho[0]*sho[1]*sho[2]*sho[3]*sho[4]*sho[5]*sho[6]) >= sho[7] - sho[7]%sub[7])):
                
                dest[K + cont] = data_aux[K + cont]
                cont += 1
                
            else:
                
                L = K + cont
                
                J = (dim[0]*((K)%sub[0] + K//(sub[0]*sub[1]*sub[2]*sub[3]*sub[4]*sub[5]*sub[6]*sub[7])%(shp[0]//sub[0])*sub[0])
                    +
                    dim[1]*(K//(sub[0])%sub[1]*shp[0] + K//(shp[0]*sub[1]*sub[2]*sub[3]*sub[4]*sub[5]*sub[6]*sub[7])%(shp[1]//sub[1])*shp[0]*sub[1])
                    +
                    dim[2]*(K//(sub[0]*sub[1])%sub[2]*shp[1]*shp[0] + K//(shp[0]*shp[1]*sub[2]*sub[3]*sub[4]*sub[5]*sub[6]*sub[7])%(shp[2]//sub[2])*shp[0]*shp[1]*sub[2])
                    +
                    dim[3]*(K//(sub[0]*sub[1]*sub[2])%sub[3]*shp[0]*shp[1]*shp[2] + K//(shp[0]*shp[1]*shp[2]*sub[3]*sub[4]*sub[5]*sub[6]*sub[7])%(shp[3]//sub[3])*shp[0]*shp[1]*shp[2]*sub[3])
                    +
                    dim[4]*(K//(sub[0]*sub[1]*sub[2]*sub[3])%sub[4]*shp[0]*shp[1]*shp[2]*shp[3] + K//(shp[0]*shp[1]*shp[2]*shp[3]*sub[4]*sub[5]*sub[6]*sub[7])%(shp[4]//sub[4])*shp[0]*shp[1]*shp[2]*shp[3]*sub[4])
                    +
                    dim[5]*(K//(sub[0]*sub[1]*sub[2]*sub[3]*sub[4])%sub[5]*shp[0]*shp[1]*shp[2]*shp[3]*shp[4] + K//(shp[0]*shp[1]*shp[2]*shp[3]*shp[4]*sub[5]*sub[6]*sub[7])%(shp[5]//sub[5])*shp[0]*shp[1]*shp[2]*shp[3]*shp[4]*sub[5])
                    +
                    dim[6]*(K//(sub[0]*sub[1]*sub[2]*sub[3]*sub[4]*sub[5])%sub[6]*shp[0]*shp[1]*shp[2]*shp[3]*shp[4]*shp[5] + K//(shp[0]*shp[1]*shp[2]*shp[3]*shp[4]*shp[5]*sub[6]*sub[7])%(shp[6]//sub[6])*shp[0]*shp[1]*shp[2]*shp[3]*shp[4]*shp[5]*sub[6])
                    +
                    dim[7]*(K//(sub[0]*sub[1]*sub[2]*sub[3]*sub[4]*sub[5]*sub[6])%sub[7]*shp[0]*shp[1]*shp[2]*shp[3]*shp[4]*shp[5]*shp[6] + K//(shp[0]*shp[1]*shp[2]*shp[3]*shp[4]*shp[5]*shp[6]*sub[7])%(shp[7]//sub[7])*shp[0]*shp[1]*shp[2]*shp[3]*shp[5]*shp[6]*sub[7]))

                
                inc = (dim[0] * (J)//shp[0] * (sho[0]%sub[0])
                       + dim[1] * (J)//(shp[1] * shp[0]) * (sho[1]%sub[1])*sho[0]
                       + dim[2] * (J)//(shp[2] * shp[1] * shp[0]) * (sho[2]%sub[2]) * sho[0] * sho[1]
                       + dim[3] * (J)//(shp[3] * shp[2]*shp[1]*shp[0]) * (sho[3]%sub[3]) * sho[0]*sho[1]*sho[2]
                       + dim[4] * (J)//(shp[4] * shp[3]*shp[2]*shp[1]*shp[0]) * (sho[4]%sub[4]) * sho[0]*sho[1]*sho[2]*sho[3]
                       + dim[5] * (J)//(shp[5] * shp[4]*shp[3]*shp[2]*shp[1]*shp[0]) * (sho[5]%sub[5]) * sho[0]*sho[1]*sho[2]*sho[3]*sho[4]
                       + dim[6] * (J)//(shp[6] * shp[5]*shp[4]*shp[3]*shp[2]*shp[1]*shp[0]) * (sho[6]%sub[6]) * sho[0]*sho[1]*sho[2]*sho[3]*sho[4]*sho[5]
                       + dim[7] * (J)//(shp[7] * shp[6]*shp[5]*shp[4]*shp[3]*shp[2]*shp[1]*shp[0]) * (sho[7]%sub[7]) * sho[0]*sho[1]*sho[2]*sho[3]*sho[4]*sho[5]*sho[6])
                
                dest[L] = data_aux[J + inc]
                
                K+=1

        # Añadimos los últimos elementos que faltan por copiar

        for L in range(K + cont, sho[0]*sho[1]*sho[2]*sho[3]*sho[4]*sho[5]*sho[6]*sho[7]):
            dest[L] = data_aux[L]
    

    return dest

### Algoritmo final con *numpy*

In [12]:
def t_dataN(data, sub_shp, inverse=False):
    
    dim = [0, 0, 0, 0, 0, 0, 0, 0]
    sho = [1, 1, 1, 1, 1, 1, 1, 1]
    shp = [1, 1, 1, 1, 1, 1, 1, 1]
    sub = [1, 1, 1, 1, 1, 1, 1, 1]
    
    dest = np.zeros(data.shape, dtype=data.dtype).flatten()
       
    org_shp = list(data.shape)
        
    dta_shp = [org_shp[i] - org_shp[i]%sub_shp[i] for i in range(len(org_shp))]
    
    sub_shp = sub_shp
    
    for i in range(len(dta_shp)):
        dim[i] = 1
        shp[i] = dta_shp[i]
        sub[i] = sub_shp[i]
        sho[i] = org_shp[i]
     
    for i in range(len(dim)):
        if dim[i] * sho[i]// sub[i] != 0:
            data2 = np.delete(data, range(sho[i]//sub[i]*sub[i], sho[i]), i)

    res = np.zeros((sho[0] // sub[0]) * sub[0] * (sho[1] // sub[1]) * sub[1] * (sho[2] // sub[2]) * sub[2] * (sho[3] // sub[3]) * sub[3] * (sho[4] // sub[4]) * sub[4] * (sho[5] // sub[5]) * sub[5] * (sho[6] // sub[6]) * sub[6] * (sho[7] // sub[7]) * sub[7], dtype=np.int32)

    cont = 0
    for i in range(sho[0] // sub[0]):
        for j in range(sho[1] // sub[1]):
            for k in range(sho[2] // sub[2]):
                for l in range(sho[3] // sub[3]):
                     for m in range(sho[4] // sub[4]):
                        for n in range(sho[5] // sub[5]):
                            for o in range(sho[6] // sub[6]):
                                for p in range(sho[7] // sub[7]):
                                    origin = data2[sub[0]*i:sub[0]*(i+1), 
                                                   sub[1]*j:sub[1]*(j+1), 
                                                   sub[2]*k:sub[2]*(k+1),
                                                   sub[3]*l:sub[3]*(l+1), 
                                                   sub[4]*m:sub[4]*(m+1),
                                                   sub[5]*n:sub[5]*(n+1), 
                                                   sub[6]*o:sub[6]*(o+1),
                                                   sub[7]*p:sub[7]*(p+1)].reshape((1,sub[0]*sub[1]*sub[2]*sub[3]*sub[4]*sub[5]*sub[6]*sub[7]))
                                    
                                    res[cont*sub[0]*sub[1]*sub[2]*sub[3]*sub[4]*sub[5]*sub[6]*sub[7]:
                                        (cont+1)*sub[0]*sub[1]*sub[2]*sub[3]*sub[4]*sub[5]*sub[6]*sub[7]] = origin
                                    
                                    cont += 1

    res = res.reshape(sho[0] // sub[0] * sub[0],
                      sho[1] // sub[1] * sub[1],
                      sho[2] // sub[2] * sub[2],
                      sho[3] // sub[3] * sub[3],
                      sho[4] // sub[4] * sub[4], 
                      sho[5] // sub[5] * sub[5],
                      sho[6] // sub[6] * sub[6], 
                      sho[7] // sub[7] * sub[7])

    resN = np.copy(data)


    for i in range(sho[0] // sub[0] * sub[0]):
        for j in range(sho[1] // sub[1] * sub[1]):
                for k in range(sho[2] // sub[2] * sub[2]):
                    for l in range(sho[3] // sub[3] * sub[3]):
                        for m in range(sho[4] // sub[4] * sub[4]):
                            for n in range(sho[5] // sub[5] * sub[5]):
                                    for o in range(sho[6] // sub[6] * sub[6]):
                                        for p in range(sho[7] // sub[7] * sub[7]):

                                            resN[i][j][k][l][m][n][o][p] = res[i][j][k][l][m][n][o][p]
    
    return resN

### Algoritmo final en *C*

In [13]:
def t_dataC(src, dest, sub_shp, inverse=False):
    
    typesize = src.dtype.itemsize
    shape = src.shape
    dimension = len(shape)
    
    src2 = ffi.from_buffer(src)
    dest2 = ffi.from_buffer(dest)

    inverse = 0
    ret = lib.transform_data_general(src2, dest2, typesize, sub_shp, shape, dimension, inverse)
    

## Test del algoritmo

### Definición de los parámetros del conjunto de datos

In [28]:
a, b, c, d, e, f, g, h = 4, 5, 7, 1, 11, 12, 5, 16
sub = [2, 2, 2, 1, 3, 5, 2, 8]

### Resultado en *numpy*

In [29]:
data = np.arange(a * b * c * d * e * f * g * h, dtype=np.int32).reshape(a, b, c, d, e, f, g, h)

resN = t_dataN(data, sub, inverse=False).reshape(data.shape)

print(resN)

[[[[[[[[      0       1       2 ...      21      22      23]
       [     80      81      82 ...     101     102     103]
       [    160     161     162 ...     181     182     183]
       [    240     241     242 ...     261     262     263]
       [     64      65      66 ...      77      78      79]]

      [[    320     321     322 ...     341     342     343]
       [    960     961     962 ...     981     982     983]
       [   1040    1041    1042 ...    1061    1062    1063]
       [   1120    1121    1122 ...    1141    1142    1143]
       [    144     145     146 ...     157     158     159]]

      [[   1200    1201    1202 ...    1221    1222    1223]
       [   1280    1281    1282 ...    1301    1302    1303]
       [   1920    1921    1922 ...    1941    1942    1943]
       [   2000    2001    2002 ...    2021    2022    2023]
       [    224     225     226 ...     237     238     239]]

      ...

      [[  74960   74961   74962 ...   74981   74982   74983]
       

### Resultado del algoritmo (*C*)

In [32]:
from tdg import ffi, lib

data = np.arange(a * b * c * d * e * f * g * h, dtype=np.int32).reshape(a, b, c, d, e, f, g, h)
resC = np.zeros(data.size, dtype=data.dtype).reshape(data.shape)

t_dataC(data, resC, sub, inverse=False)

print(resC)

[[[[[[[[      0       1       2 ...      21      22      23]
       [     80      81      82 ...     101     102     103]
       [    160     161     162 ...     181     182     183]
       [    240     241     242 ...     261     262     263]
       [     64      65      66 ...      77      78      79]]

      [[    320     321     322 ...     341     342     343]
       [    960     961     962 ...     981     982     983]
       [   1040    1041    1042 ...    1061    1062    1063]
       [   1120    1121    1122 ...    1141    1142    1143]
       [    144     145     146 ...     157     158     159]]

      [[   1200    1201    1202 ...    1221    1222    1223]
       [   1280    1281    1282 ...    1301    1302    1303]
       [   1920    1921    1922 ...    1941    1942    1943]
       [   2000    2001    2002 ...    2021    2022    2023]
       [    224     225     226 ...     237     238     239]]

      ...

      [[  74960   74961   74962 ...   74981   74982   74983]
       

### Resultado del algoritmo (*Python*)

In [33]:
data = np.arange(a * b * c * d * e * f * g * h, dtype=np.int32).reshape(a, b, c, d, e, f, g, h)
resP = t_dataP(data, sub, inverse=False).reshape(data.shape)

print(resP)

[[[[[[[[      0       1       2 ...      21      22      23]
       [     80      81      82 ...     101     102     103]
       [    160     161     162 ...     181     182     183]
       [    240     241     242 ...     261     262     263]
       [     64      65      66 ...      77      78      79]]

      [[    320     321     322 ...     341     342     343]
       [    960     961     962 ...     981     982     983]
       [   1040    1041    1042 ...    1061    1062    1063]
       [   1120    1121    1122 ...    1141    1142    1143]
       [    144     145     146 ...     157     158     159]]

      [[   1200    1201    1202 ...    1221    1222    1223]
       [   1280    1281    1282 ...    1301    1302    1303]
       [   1920    1921    1922 ...    1941    1942    1943]
       [   2000    2001    2002 ...    2021    2022    2023]
       [    224     225     226 ...     237     238     239]]

      ...

      [[  74960   74961   74962 ...   74981   74982   74983]
       

### Comprobando que los resultados son iguales

In [34]:
try:
    np.testing.assert_array_equal(resP, resC)
except Exception:
    print("La matices generadas por los algoritmos de Python y C no son iguales.")
    
try:
    np.testing.assert_array_equal(resN, resP)
except Exception:
    print("La matices generadas por los algoritmos de Numpy y Python no son iguales.")
    
try:
    np.testing.assert_array_equal(resP, resC)
except Exception:
    print("La matices generadas por los algoritmos de Numpy y C no son iguales.")