## Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import random

from sklearn.neighbors import KNeighborsClassifier, DistanceMetric
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA

import sys
import time
import math

%config InlineBackend.figure_format='svg'
%matplotlib inline

In [None]:
# Extra
import scipy.sparse

## Import Data

In [None]:
def readMNIST (filename):
    data = np.genfromtxt(filename, delimiter = ",")
    data = data.transpose()
    
    label = np.array(data[:, -1])
    feature = np.array(data[:,:-1])
    
    return feature, label

In [None]:
sys.stdout.write('Loading MNIST data... ')
MNIST_train_feature, MNIST_train_label = readMNIST('./MNIST/train.csv')
MNIST_test_feature, MNIST_test_label = readMNIST('./MNIST/test.csv')
print ('done.')

In [None]:
sys.stdout.write('Split MNIST into specific outputs... ')
# split into 0, 1, 3, 5
#   1) zip labels and features together
MNIST_train_list = list(zip(MNIST_train_feature, MNIST_train_label))
MNIST_test_list = list(zip(MNIST_test_feature, MNIST_test_label))
print ('done.')

In [None]:
def shuffle (featurelist, labellist):
    merged = list(zip(featurelist, labellist))
    random.shuffle(merged)
    featurelist, labellist = zip(*merged)
    return np.array(featurelist), np.array(labellist)

def mergeAndShuffle(list1, list2):
    featurelist = []
    labellist = []
    for (feature, label) in list1:
        featurelist.append(feature)
        labellist.append(label)
    for (feature, label) in list2:
        featurelist.append(feature)
        labellist.append(label)
    return shuffle (featurelist, labellist)

# Ising Model De-noise

## Choose Dataset

In [None]:
feature, label = mergeAndShuffle(MNIST_test_list, MNIST_train_list)

In [None]:
BATCH_SIZE = 10
feature = feature[:BATCH_SIZE]
label = label[:BATCH_SIZE]

### Convert to binary matrix

In [None]:
feature = feature > feature.mean()

In [None]:
def plotGrayScale(im):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.imshow(im.reshape(28, 28), aspect='auto', cmap=plt.cm.gray, interpolation='nearest')

In [None]:
#plotGrayScale(feature[105])

### Add Noise

In [None]:
flip = np.random.random(feature.shape) > .9

In [None]:
feature_noise = np.logical_or(np.logical_and(flip, np.logical_not(feature)), np.logical_and(feature, np.logical_not(flip)))

In [None]:
#plotGrayScale(feature[16])

## Gibbs Sampler

### Initialize Variables

In [None]:
def addr(r, c, dim):
    return r * dim[1] + c

def Graph(dim):
    row = dim[0]
    col = dim[1]
    length = row * col
    W = np.zeros((length, length))
    for i in range(length):
        r = i % row
        c = i % col 
        if r + 1 < row:
            W[i, addr (r + 1, c, dim)] = 1
        if r - 1 > 0:
            W[i, addr (r - 1, c, dim)] = 1
        if c + 1 < col:
            W[i, addr (r, c + 1, dim)] = 1
        if c - 1 > 0:
            W[i, addr (r, c - 1, dim)] = 1
    return W.astype(int)

def B (X, c):
    return (2 * X - np.ones(x.shape)) * c

In [None]:
W = Graph((28, 28))
b = 5 
iterations = 200

### Update Equations

In [None]:
def np_sigmoid(x):
    return 1/(1 + np.exp(-x))

def updateX(X, W, b):
    H = np.array([computeH(X, i, W, b) for i in range(X.shape[0])])
    p = np_sigmoid(2 * H)
    rand = np.random.random(X.shape)
    X_new = rand <= p
    return 2 * X_new - 1

def computeH(X, i, W, b):
    xi = X[i]
    return 0.5 * sum([xi * X[j] for j in W[i,:]]) + b * xi

### Run Gibbs Sampling

In [None]:
feature_cleaned = feature_noise.copy()
for i in range(feature_cleaned.shape[0]):
    for j in range(iterations):
        feature_cleaned[i,:] = updateX(feature_cleaned[i,:], W, b)

### Analysis

In [None]:
def accuracy(X, X_n, X_c):
    N = X.shape[0]
    total = np.array([imageMatch(X[i,:,:],X_n[i,:,:],X_c[i,:,:]) for i in range (N)])
    sums = np.sum(total, asix = 1) / N
    return sums[0], sums[1], sums[2]

def imageMatch (X, X_n, X_c):
    total = np.mean(X == X_c)
    noisy = (X == X_n)
    clean = (X == X_c)
    unperturbed = np.sum(np.logical_and(clean, noisy)) / np.sum(noisy)
    perturbed = np.sum(np.logical_and(clean, 1 - noisy)) / (X.shape[0] - np.sum(noisy))
    return total, unperturbed, perturbed

In [None]:
plotGrayScale(feature[5,:])
plotGrayScale(feature_noise [5,:])
plotGrayScale(feature_clean [5,:])

In [None]:
print (accuracy(feature, feature_noise, feature_cleaned))

## Mean Field Update

### Update Equations

In [None]:
def np_sigmoid(x):
    return 1/(1 + np.exp(-x))

def updateX(X, W, b):
    H = np.array([computeH(X, i, W, b) for i in range(X.shape[0])])
    mu = np.tanh(H)
    X_new = mu >= 0
    return 2 * X_new - 1

def computeH(X, i, W, b):
    xi = X[i]
    return 0.5 * sum([xi * X[j] for j in W[i,:]]) + b * xi

### Run Mean Field Update

In [None]:
feature_cleaned = feature_noise.copy()
for i in range(feature_cleaned.shape[0]):
    for j in range(iterations):
        feature_cleaned[i,:] = updateX(feature_cleaned[i,:], W, b)

### Analysis

In [None]:
plotGrayScale(feature[5,:])
plotGrayScale(feature_noise [5,:])
plotGrayScale(feature_clean [5,:])

In [None]:
print (accuracy(feature, feature_noise, feature_cleaned))

# Resources

In [None]:
# Assignment: 
#   

# Data Located at:
#   http://yann.lecun.com/exdb/mnist/
#   http://cis.jhu.edu/~sachin/digit/digit.html


# Bugs:
# Descrition: 
#   solution: