<a href="https://colab.research.google.com/github/davidwhogg/mnist-minus-minus/blob/main/notebooks/mnist_minus_minus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST-minus-minus

A handwritten-digit reading task, now with more chaos!

## Authors
- **David W Hogg** (NYU) (Flatiron)
- **Soledad Villar** (JHU)

## To-Do / Bugs:
- Need to keep and report group-element labels, not just content labels.
- Need to package with pip or zenodo or somesuch.

## Notes
- Some content copied from <https://github.com/wxs/keras-mnist-tutorial/blob/master/MNIST%20in%20Keras.ipynb>.
- Some content copied from <https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/fashion_mnist.ipynb>.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.datasets import fashion_mnist
rng = np.random.default_rng(17) # KEY

In [None]:
# the original MNIST data, split between train and test sets
(X0_train, y0_train), (X0_test, y0_test) = mnist.load_data()
print("X0_train shape", X0_train.shape)
print("y0_train shape", y0_train.shape)
print("X0_test shape", X0_test.shape)
print("y0_test shape", y0_test.shape)

In [None]:
# the original Fashion-MNIST data, split between train and test sets
(fX0_train, fy0_train), (fX0_test, fy0_test) = fashion_mnist.load_data()
print("fX0_train shape", fX0_train.shape)
print("fy0_train shape", fy0_train.shape)
print("fX0_test shape", fX0_test.shape)
print("fy0_test shape", fy0_test.shape)

In [None]:
plt.rcParams['figure.figsize'] = (14,16) # Make the figures a bit bigger
for i in range(36):
    plt.subplot(6,6,i+1)
    plt.imshow(X0_train[i], cmap='gray_r', interpolation='none')
    plt.title("MNIST class {}".format(y0_train[i]))

In [None]:
plt.rcParams['figure.figsize'] = (14,16) # Make the figures a bit bigger
for i in range(36):
    plt.subplot(6,6,i+1)
    plt.imshow(fX0_train[i], cmap='gray_r', interpolation='none')
    plt.title("Fashion-MNIST class {}".format(fy0_train[i]))

In [None]:
def transform(Xs, rng):
    (n, n1, n2) = Xs.shape
    Xm = Xs.copy()
    for i,X in enumerate(Xs):
        X1 = X.copy()
        if rng.random() < 0.5:
            X1 = np.flip(X1, axis=0)
        X1 = np.rot90(X1, rng.integers(0,4))
        Xm[i] = X1
    return Xm

def minusify_1(X, y, N, rng):
    (n, n1, n2) = X.shape
    assert n1 == n2
    assert y.shape == (n, )
    inds = rng.integers(0, n, N)
    ym = np.zeros(N).astype(int)
    ym = y[inds]
    Xm = transform(X[inds], rng)
    return Xm, ym

def minusify_4(X, y, N, rng):
    (n, n1, n2) = X.shape
    assert n1 == n2
    assert y.shape == (n, )
    inds = [rng.integers(0, n, N) for i in range(4)]
    ym = np.zeros(N).astype(int)
    for i in range(4):
        ym = ym + 10**i * y[inds[i]].astype(int)
    Xmm = np.concatenate((np.concatenate((X[inds[3]], X[inds[2]]), axis=2),
                          np.concatenate((X[inds[1]], X[inds[0]]), axis=2)),
                         axis=1)
    Xm = transform(Xmm, rng)
    return Xm, ym

def minusify_9(X, y, N, rng):
    (n, n1, n2) = X.shape
    assert n1 == n2
    assert y.shape == (n, )
    inds = rng.integers(0, n, size=(N, 3, 3))
    ym = y[inds[:, 1, 1]].astype(int)
    Xmm = np.concatenate([np.concatenate([X[inds[:, i, j]] for i in range(3)], axis=2) for j in range(3)],
                         axis=1)
    Xm = transform(Xmm, rng)
    return Xm, ym

In [None]:
X_trainf, y_trainf = minusify_1(fX0_train, fy0_train, 60_000, rng)
X_testf, y_testf = minusify_1(fX0_test, fy0_test, 10_000, rng)
print("X_trainf shape", X_trainf.shape)
print("y_trainf shape", y_trainf.shape)
print("X_testf shape", X_testf.shape)
print("y_testf shape", y_testf.shape)

In [None]:
for i in range(36):
    plt.subplot(6,6,i+1)
    plt.imshow(X_trainf[i], cmap='gray_r', interpolation='none')
    plt.title("MNIST-f class {:01d}".format(y_trainf[i]))

In [None]:
X_train4, y_train4 = minusify_4(X0_train, y0_train, 60_000, rng)
X_test4, y_test4 = minusify_4(X0_test, y0_test, 10_000, rng)
print("X_train4 shape", X_train4.shape)
print("y_train4 shape", y_train4.shape)
print("X_test4 shape", X_test4.shape)
print("y_test4 shape", y_test4.shape)

In [None]:
for i in range(36):
    plt.subplot(6,6,i+1)
    plt.imshow(X_train4[i], cmap='gray_r', interpolation='none')
    plt.title("MNIST-4 class {:04d}".format(y_train4[i]))

In [None]:
X_train9, y_train9 = minusify_9(X0_train, y0_train, 60_000, rng)
X_test9, y_test9 = minusify_9(X0_test, y0_test, 10_000, rng)
print("X_train9 shape", X_train9.shape)
print("y_train9 shape", y_train9.shape)
print("X_test9 shape", X_test9.shape)
print("y_test9 shape", y_test9.shape)

In [None]:
for i in range(36):
    plt.subplot(6,6,i+1)
    plt.imshow(X_train9[i], cmap='gray_r', interpolation='none')
    plt.title("MNIST-9 class {:01d}".format(y_train9[i]))

In [None]:
def infinity_transform(Xs, n1, n2, rng):
    (n, in_n1, in_n2) = Xs.shape
    Xm = np.zeros((n, n1, n2))
    xyout = np.array(np.meshgrid(np.arange(0.25, n1, 0.5) - 0.5 * n1, np.arange(0.25, n2, 0.5) - 0.5 * n2))
    Mm = np.zeros((n, 2, 2))
    for i,X in enumerate(Xs):
        eigratio = 0.0
        while eigratio < 0.3:
            M = rng.normal(size=(2, 2))
            u, s, v = np.linalg.svd(M)
            eigratio = s[1] / s[0]
        M /= np.sqrt(np.prod(s))
        xyin = np.tensordot(M, xyout, (1, 0))
        yin = np.floor(np.clip(xyin[0] + 0.5 * in_n1, 0., in_n1-0.5)).astype(int) ## HACKS
        xin = np.floor(np.clip(xyin[1] + 0.5 * in_n2, 0., in_n1-0.5)).astype(int)
        foo = X[yin, xin]
        X1 = foo.reshape(n1, 2, n2, 2).mean(-1).mean(1)
        Xm[i] = X1
        Mm[i] = M
    return Xm, Mm

def minusify_infinity(X, y, N, rng):
    (n, n1, n2) = X.shape
    foo = 5
    assert n1 == n2
    assert y.shape == (n, )
    inds = rng.integers(0, n, size=(N, foo, foo))
    ym = y[inds[:, foo // 2, foo // 2]].astype(int)
    Xmm = np.concatenate([np.concatenate([X[inds[:, i, j]] for i in range(foo)],
                                         axis=2) for j in range(foo)], axis=1)
    Xm, Mm = infinity_transform(Xmm, 3 * n1, 3 * n2, rng)
    return Xm, ym, Mm

In [None]:
X_trainInf, y_trainInf, M_trainInf = minusify_infinity(X0_train, y0_train, 1_000, rng)
X_testInf, y_testInf, M_testInf = minusify_infinity(X0_test, y0_test, 1_000, rng)
print("X_trainInf shape", X_trainInf.shape)
print("y_trainInf shape", y_trainInf.shape)
print("X_testInf shape", X_testInf.shape)
print("y_testInf shape", y_testInf.shape)

In [None]:
for i in range(36):
    plt.subplot(6,6,i+1)
    plt.imshow(X_trainInf[i], cmap='gray_r', interpolation='none')
    plt.title("MNIST-Inf class {:01d}".format(y_trainInf[i]))