In [41]:
from torchvision.datasets import MNIST
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [42]:
mnist_data_train = MNIST('/', train=True, download=True)
mnist_data_test = MNIST('/', train=False, download=True)

In [43]:
# Putting data into correct variables
X_train, Y_train = mnist_data_train.data, mnist_data_train.targets
X_test, Y_test = mnist_data_test.data, mnist_data_test.targets

In [44]:
# crop the image by 5 pixels from all the sides
X_train = X_train[:, 5:-5, 5:-5]
X_test = X_test[:, 5:-5, 5:-5]

In [45]:
# Flatten the images into vectors
X_train, X_test = np.reshape(X_train, (X_train.shape[0], 18*18)),np.reshape(X_test, (X_test.shape[0], 18*18))

In [46]:
class LinearRegression():
  # Constructor of this class
  def __init__(self):
    self.coeff = list()
  # This function is used to find the coefficients for the line of best fit
  def fit(self, A, Y):
    # Add Bias
    print(A)
    A = np.concatenate((np.ones((len(A), 1)), A), axis=1)
    # Find Pseudo Inverse
    pseudo_inv = np.matmul(np.linalg.inv(np.matmul(np.transpose(A), A)), np.transpose(A))
    # Finally get the coefficients
    self.coeff = np.matmul(pseudo_inv, np.reshape(Y, (-1, 1)))
    # This function uses the found coefficients to deliver predictions
  def predict(self, A):
    A = np.concatenate((np.ones((len(A), 1)), A), axis=1)
    # print("Model Coefficients:", self.coeff)
    return np.squeeze(np.matmul(A, self.coeff))


In [47]:
# def fit_curve(X_train, Y_train, X_test, Y_test, degree):
#   if degree == 1:
#     X_new_train = np.reshape(X_train, (-1, 1))
#     X_new_test = np.reshape(X_test, (-1, 1))
#   else:
#     X_new_train = np.transpose(np.asarray([X_train**i for i in range(1, degree+1)]))
#     X_new_test = np.transpose(np.asarray([X_test**i for i in range(1, degree+1)]))
#   model = LinearRegression()
#   model.fit(X_new_train, Y_train)
#   preds = model.predict(X_new_test)
#   preds = np.asarray([1 if i>0 else -1 for i in preds])
#   print("Accuracy:", accuracy_score(Y_test, preds), "\n\n")

In [50]:
def fit_curve(X_train, Y_train, X_test, Y_test, degree):
    if degree == 1:
        X_new_train = X_train  # Already a 2D array
        X_new_test = X_test    # Already a 2D array
    else:
        # Create polynomial features but keep it as a 2D array
        X_new_train = np.column_stack([X_train**i for i in range(1, degree+1)])
        X_new_test = np.column_stack([X_test**i for i in range(1, degree+1)])

    model = LinearRegression()
    model.fit(X_new_train, Y_train)
    preds = model.predict(X_new_test)
    preds = np.asarray([1 if i>0 else -1 for i in preds])
    return accuracy_score(Y_test, preds)

In [53]:
# Create 9 datasets with Y =1 corresponds to +ve class and Y = -1 corresponds to negative class
# Create separate datasets where Y=+1 for '1' and Y=-1 for each other digit
accuracy_scores = {}
for digit in range(10):
    if digit == 1:
        continue  # Skip 1 as it is always positive

    # print(f"Creating dataset for 1 vs {digit}")

    # Define binary classification labels
    Y_train_binary = np.where(Y_train == 1, 1, np.where(Y_train == digit, -1, 0))
    Y_test_binary = np.where(Y_test == 1, 1, np.where(Y_test == digit, -1, 0))

    # Filter dataset to include only 1 and the current digit
    train_mask = (Y_train_binary != 0)
    test_mask = (Y_test_binary != 0)

    X_train_filtered, Y_train_filtered = X_train[train_mask], Y_train_binary[train_mask]
    X_test_filtered, Y_test_filtered = X_test[test_mask], Y_test_binary[test_mask]

    accuracy = fit_curve(X_train_filtered, Y_train_filtered, X_test_filtered, Y_test_filtered, 1)
    accuracy_scores[f"1 vs {digit}"] = accuracy

tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]], dtype=torch.uint8)
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0, 

In [54]:
accuracy_scores

{'1 vs 0': 0.9952718676122931,
 '1 vs 2': 0.9806183664051684,
 '1 vs 3': 0.9916083916083916,
 '1 vs 4': 0.995276334435522,
 '1 vs 5': 0.9911198815984213,
 '1 vs 6': 0.9952221691352127,
 '1 vs 7': 0.9889042995839112,
 '1 vs 8': 0.9781887150308203,
 '1 vs 9': 0.9944029850746269}

In [55]:
from torchvision.datasets import MNIST
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Download the MNIST dataset
mnist_data_train = MNIST('./', train=True, download=True)
mnist_data_test = MNIST('./', train=False, download=True)

# Extract data and labels
X_train, Y_train = mnist_data_train.data.numpy(), mnist_data_train.targets.numpy()
X_test, Y_test = mnist_data_test.data.numpy(), mnist_data_test.targets.numpy()

# Crop images by 5 pixels from all sides
X_train = X_train[:, 5:-5, 5:-5]
X_test = X_test[:, 5:-5, 5:-5]

# Vectorize the data (flatten the images)
X_train = X_train.reshape(X_train.shape[0], -1)  # Result: (60000, 18*18)
X_test = X_test.reshape(X_test.shape[0], -1)     # Result: (10000, 18*18)

class LinearRegressionLS:
    def __init__(self):
        self.coef = None

    def fit(self, X, y):
        # Add bias term
        X_bias = np.column_stack([np.ones(X.shape[0]), X])

        # Calculate coefficients using the normal equation: w = (X^T X)^(-1) X^T y
        XTX = np.dot(X_bias.T, X_bias)
        XTy = np.dot(X_bias.T, y)
        self.coef = np.linalg.solve(XTX, XTy)

    def predict(self, X):
        # Add bias term
        X_bias = np.column_stack([np.ones(X.shape[0]), X])
        # Make predictions
        return np.dot(X_bias, self.coef)

# Function to run a binary classification experiment
def run_experiment(digit1, digit2):
    print(f"Running experiment for 1 vs. {digit2}")

    # Create binary labels: +1 for digit1, -1 for digit2
    train_mask = np.logical_or(Y_train == digit1, Y_train == digit2)
    test_mask = np.logical_or(Y_test == digit1, Y_test == digit2)

    # Create binary labels
    y_train_binary = np.where(Y_train == digit1, 1, -1)[train_mask]
    y_test_binary = np.where(Y_test == digit1, 1, -1)[test_mask]

    # Filter the training and test data
    x_train_filtered = X_train[train_mask]
    x_test_filtered = X_test[test_mask]

    # Train the model
    model = LinearRegressionLS()
    model.fit(x_train_filtered, y_train_binary)

    # Predict
    y_pred_raw = model.predict(x_test_filtered)
    y_pred = np.where(y_pred_raw > 0, 1, -1)

    # Calculate accuracy and error
    accuracy = accuracy_score(y_test_binary, y_pred)
    error_rate = 1 - accuracy

    # Calculate confusion matrix
    cm = confusion_matrix(y_test_binary, y_pred)

    print(f"1 vs. {digit2} - Accuracy: {accuracy:.4f}, Error rate: {error_rate:.4f}")
    print(f"Confusion Matrix:\n{cm}\n")

    return error_rate, cm

# Run experiments for 1 vs. each other digit
results = {}
digit1 = 1

for digit2 in range(10):
    if digit2 != digit1:
        error_rate, cm = run_experiment(digit1, digit2)
        results[digit2] = error_rate

# Find the digit with the highest misclassification error
most_similar_digit = max(results, key=results.get)
highest_error = results[most_similar_digit]

print(f"\nResults summary:")
for digit, error in sorted(results.items()):
    print(f"1 vs. {digit}: Misclassification Error = {error:.4f}")

print(f"\nThe digit most similar to digit 1 is {most_similar_digit} with a misclassification error of {highest_error:.4f}")

# Map to multiple choice options
options = {
    0: "(a) 1 vs. 0",
    7: "(b) 1 vs. 7",
    2: "(c) 1 vs. 2",
    9: "(d) 1 vs. 9"
}

print(f"The answer is: {options.get(most_similar_digit, 'Not in provided options')}")

# Visualize the results
plt.figure(figsize=(10, 6))
digits = list(results.keys())
errors = [results[d] for d in digits]
bars = plt.bar(digits, errors)

# Highlight the highest error
max_index = errors.index(max(errors))
bars[max_index].set_color('red')

plt.xlabel('Digit')
plt.ylabel('Misclassification Error')
plt.title('Misclassification Error for Binary Classification of Digit 1 vs. Other Digits')
plt.xticks(digits)
plt.savefig('misclassification_error.png')
plt.close()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:01<00:00, 6.07MB/s]


Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 160kB/s]


Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:01<00:00, 1.30MB/s]


Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 5.16MB/s]


Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw

Running experiment for 1 vs. 0
1 vs. 0 - Accuracy: 0.9953, Error rate: 0.0047
Confusion Matrix:
[[ 977    3]
 [   7 1128]]

Running experiment for 1 vs. 2
1 vs. 2 - Accuracy: 0.9806, Error rate: 0.0194
Confusion Matrix:
[[1000   32]
 [  10 1125]]

Running experiment for 1 vs. 3
1 vs. 3 - Accuracy: 0.9916, Error rate: 0.0084
Confusion Matrix:
[[1004    6]
 [  12 1123]]

Running experiment for 1 vs. 4
1 vs. 4 - Accuracy: 0.9953, Error rate: 0.0047
Confusion Matrix:
[[ 977    5]
 [   5 1130]]

Running experiment for 1 vs. 5
1 vs. 5 - Accuracy: 0.9911, Error rate: 0.0089
Confusion Matrix:
[[ 882   10]
 [   8 1127]]

Running experiment for 1 vs. 6
1 vs. 6 - Accuracy: 0.9952, Error rate: 0.0048
Confusion Matrix:
[[ 951    7]
 [   3 1132]]

Running experiment for 1 vs. 7
1 vs. 7 - Accuracy: 0.9889, Error rate: 0.0111
Confusion Matrix:
[[1007   21]
 [   3 1132]]

Running experiment for 1 vs. 8
1 vs. 8 - Accuracy: 0.9782, Error ra