In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import math as mt

from tensorflow.keras.datasets import mnist
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten
from skimage.transform import resize

# Load The MNIST Data (200 datapoints for each category [10] in the training set)

In [2]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

selected_train_indices = []
selected_test_indices = []

for digit in range(10):
    indices = np.where(train_labels == digit)[0]
    selected_indices = np.random.choice(indices, size=200, replace=False)
    selected_train_indices.extend(selected_indices)

remaining_indices = set(range(len(train_labels))) - set(selected_train_indices)
remaining_indices = list(remaining_indices)
selected_train_indices.extend(np.random.choice(remaining_indices, size=2000 - 2000, replace=False))
selected_test_indices = np.random.choice(range(len(test_labels)), size=2000, replace=False)

selected_train_images = train_images[selected_train_indices]
selected_train_labels = train_labels[selected_train_indices]
selected_test_images = test_images[selected_test_indices]
selected_test_labels = test_labels[selected_test_indices]

print("Selected training images shape:", selected_train_images.shape)
print("Selected training labels shape:", selected_train_labels.shape)
print("Selected testing images shape:", selected_test_images.shape)
print("Selected testing labels shape:", selected_test_labels.shape)

Selected training images shape: (2000, 28, 28)
Selected training labels shape: (2000,)
Selected testing images shape: (2000, 28, 28)
Selected testing labels shape: (2000,)


In [3]:
selected_train_images_resized = np.array([resize(image, (32, 32), anti_aliasing=True) for image in selected_train_images])
selected_test_images_resized = np.array([resize(image, (32, 32), anti_aliasing=True) for image in selected_test_images])

selected_train_images_resized = np.expand_dims(selected_train_images_resized, axis=-1)
selected_test_images_resized = np.expand_dims(selected_test_images_resized, axis=-1)

selected_train_images_resized = np.repeat(selected_train_images_resized, 3, axis=-1)
selected_test_images_resized = np.repeat(selected_test_images_resized, 3, axis=-1)

print("Resized training images shape:", selected_train_images_resized.shape)
print("Resized testing images shape:", selected_test_images_resized.shape)

Resized training images shape: (2000, 32, 32, 3)
Resized testing images shape: (2000, 32, 32, 3)


In [4]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(32, 32, 3))

x = Flatten()(base_model.output)

vgg_features_model = Model(inputs=base_model.input, outputs=x)

train_features = vgg_features_model.predict(selected_train_images_resized)
test_features = vgg_features_model.predict(selected_test_images_resized)

print("Train features shape:", train_features.shape)
print("Test features shape:", test_features.shape)

Train features shape: (2000, 512)
Test features shape: (2000, 512)


# Shuffle The Data

In [5]:
num_samples = len(train_features)
shuffled_indices = np.random.permutation(num_samples)

shuffled_images = train_features[shuffled_indices]
shuffled_labels = selected_train_labels[shuffled_indices]

print("Shuffled training images shape:", shuffled_images.shape)
print("Shuffled training labels shape:", shuffled_labels.shape)

Shuffled training images shape: (2000, 512)
Shuffled training labels shape: (2000,)


# Adding Noise

In [6]:
def add_label_noise(y_train, noise_rate):
    num_samples = len(y_train)
    num_noise_samples = int(noise_rate * num_samples)
    noise_indices = np.random.choice(num_samples, num_noise_samples, replace=False)
    noisy_labels = np.random.randint(0, 10, size=num_noise_samples)
    y_train_noisy = np.copy(y_train)
    y_train_noisy[noise_indices] = noisy_labels
    return y_train_noisy

Y_noise20 = add_label_noise(shuffled_labels, 0.2)

# Encoding Of Noise Labels

In [7]:
max_label = max(Y_noise20)
num_labels = max_label + 1

encoded_list = []
for label in Y_noise20:
    encoded_label = [-1] * num_labels
    encoded_label[label] = 1
    encoded_list.append(encoded_label)

Encoded_Noise_Labels = np.array(encoded_list)

In [8]:
m_label = max(shuffled_labels)
n_labels = m_label + 1

e_list = []
for label in shuffled_labels:
    e_label = [-1] * n_labels
    e_label[label] = 1
    e_list.append(e_label)

Encoded_True_Labels = np.array(e_list)

In [9]:
def calculate_Z(mu, lambda1, J, M3):
    tao = 1 / (mu + 2 * lambda1)
    t_hat = tao * (mu * J - M3)

    U, Sigma, Vt = np.linalg.svd(t_hat)
    min_dim = min(U.shape[1], Vt.shape[0])
    Sigma_mod = np.diag(np.maximum(Sigma[:min_dim] - tao, 0))

    Z = U[:, :min_dim] @ Sigma_mod @ Vt[:min_dim, :]
    return Z

def update_E(Y, B, M1, lambda3, mu):
    print(np.shape(B),np.shape(Y),np.shape((M1/mu)))
    M_tilda = Y - B +(M1/mu)
    n = lambda3/mu
    E = []
    for i in range(np.shape(M_tilda)[0]):
      if np.linalg.norm(M_tilda[i,:])>n:
        E.append((np.linalg.norm(M_tilda[i,:])-n)* 1/np.linalg.norm(M_tilda[i,:])*M_tilda[i,:])
      else:
        E.append([0 for i in range(np.shape(M_tilda)[1])])
    return np.array(E)

def update_J(X,Lap_mat,mu,Z_new,lambda2,M2,M3,B):
  A = 2*lambda2*np.matmul(np.matmul(X.T,Lap_mat), X) + miu*(np.matmul(X.T,X)) + miu*np.identity(np.shape(np.matmul(X.T,X))[0])
  B = np.matmul(X.T,M2) + M3 + mu*np.matmul(X.T,B)  + mu*Z
  return np.matmul(np.linalg.inv(A),B)

def update_B(mu,Y,E,X,J,M1,M2):

  B_hat = (1/(2*mu))*(mu*(Y-E+np.matmul(X,J))+M1-M2)
  B = np.zeros(np.shape(B_hat))

  for i in range(np.shape(B_hat)[0]):
    for j in range(np.shape(B_hat)[1]):
      if B_hat[i,j]>1:
        B[i,j]=1
      elif B_hat[i,j]<1 and B_hat[i,j]>-1:
        B[i,j]= B_hat[i,j]
      else:
        B[i,j]= -1
  return B

In [10]:
kernel_width = 1

In [11]:
adj_mat_w =[[] for i in range(shuffled_images.shape[0])]

for i in range(shuffled_images.shape[0]):
  for j in range(shuffled_images.shape[0]):
    adj_mat_w[i].append(mt.exp(-np.linalg.norm(shuffled_images[i,:]-shuffled_images[j,:])**2/(2*(kernel_width)**2)))

G = nx.from_numpy_array(np.array(adj_mat_w))

Deg_mat = [[] for i in range(shuffled_images.shape[0])]

for i in range(shuffled_images.shape[0]):
  for j in range(shuffled_images.shape[0]):
    if i==j:
      Deg_mat[i].append(G.degree[i])
    else:
      Deg_mat[i].append(0)
lap_mat = np.array(Deg_mat) - adj_mat_w

In [12]:
from re import M
Z = np.zeros((shuffled_images.shape[1],Encoded_Noise_Labels.shape[1]))
J= Z

E = np.zeros((Encoded_Noise_Labels.shape[0],Encoded_Noise_Labels.shape[1]))

B = np.zeros((Encoded_Noise_Labels.shape[0],Encoded_Noise_Labels.shape[1]))
M1 = np.zeros(np.shape(Encoded_Noise_Labels))

M2 = np.zeros(np.shape(Encoded_Noise_Labels))

M3 = np.zeros(np.shape(Z))

miu = 10**-3
miu_max = 10**6

p = 1.2

eps = 10**-6
iter_max = 1000

iter = 0

(2000, 10)


In [13]:
# Hyper Parameter
lambda1 = 10000
lambda2 = 1000
lambda3 = 1

In [14]:
while  iter < iter_max:
 Z = calculate_Z(miu,lambda1,J,M3)
 E = update_E(Encoded_Noise_Labels,B,M1,lambda3,miu)
 J = update_J(shuffled_images,lap_mat,miu,Z,lambda2,M2,M3,B)
 B = update_B(miu,Encoded_Noise_Labels,E,shuffled_images,J,M1,M2)

 M1 = M1 + miu*(Encoded_Noise_Labels - B - E)
 M2 = M2 + miu*(B - np.matmul(shuffled_images,J))
 M3 = M3 + miu*(Z-J)
 miu = min(p*miu,miu_max)
 iter = iter + 1

(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10) (2000, 10) (2000, 10)
(2000, 10)

In [15]:
Y_pred_ = np.matmul(shuffled_images,Z)
Y_pred = np.where(Y_pred_ < 0, -1, 1)

In [16]:
def accuracy_score(Y_true, Y_pred):
    correct = 0
    total = len(Y_true)
    for true, pred in zip(Y_true, Y_pred):
        if np.array_equal(true, pred):
            correct += 1
    return correct / total

accuracy = accuracy_score(Encoded_True_Labels, Y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8445


In [17]:
Encoded_Noise_Labels

array([[-1, -1,  1, ..., -1, -1, -1],
       [-1, -1,  1, ..., -1, -1, -1],
       [-1, -1, -1, ..., -1, -1, -1],
       ...,
       [-1, -1, -1, ..., -1, -1,  1],
       [-1, -1, -1, ..., -1, -1, -1],
       [-1, -1, -1, ...,  1, -1, -1]])