# Unsupervised learning using pacmac
### This file tests combining all networks allowing the gradients from the supervised step to flow all the way back through the previously trained networks

In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torchvision
import numpy as np
from sklearn.model_selection import train_test_split
from pacmap import PaCMAP
from tqdm import tqdm
import matplotlib.pyplot as plt

In [28]:
def load_and_preprocess_data():
    """Loads and preprocesses the MNIST dataset."""
    train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True)
    test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True)

    x_train = train_dataset.data.numpy().astype('float32') / 255.0
    y_train = train_dataset.targets.numpy()
    x_test = test_dataset.data.numpy().astype('float32') / 255.0
    y_test = test_dataset.targets.numpy()

    return x_train, y_train, x_test, y_test

In [29]:
def split_data(x_train, y_train, labeled_ratio):
    """Splits the data into labeled and unlabeled data."""
    num_labeled = int(labeled_ratio * len(x_train))
    x_labeled, x_unlabeled, y_labeled, _ = train_test_split(x_train, y_train, train_size=num_labeled, stratify=y_train, random_state=42)
    return x_labeled, x_unlabeled, y_labeled

In [30]:
def perform_pacmap(data, n_components, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0):
    """Performs PaCMAP on the data."""
    pacmap = PaCMAP(n_components=n_components, n_neighbors=n_neighbors, MN_ratio=MN_ratio, FP_ratio=FP_ratio)
    return pacmap.fit_transform(data.reshape(data.shape[0], -1))

In [31]:
class Net(nn.Module):
    """First neural network model. 28*28 -> 256 -> 128 -> 64."""
    # do the model below with l2 regularization
    def __init__(self):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [32]:
class SecondNet(nn.Module):
    """Second neural network model. 64 -> 32 -> 32 -> 16."""
    def __init__(self, input_dim=64, hidden_dim=32, output_dim=16):
        super(SecondNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [33]:
class ThirdNet(nn.Module):
    """Third neural network model. 16 -> 16 -> 16 -> 10."""
    def __init__(self, input_dim=16, hidden_dim=16, num_classes=10):
        super(ThirdNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [34]:
class CombinedNet(nn.Module):
    def __init__(self, model1, model2, model3):
        super(CombinedNet, self).__init__()
        self.model1 = model1
        self.model2 = model2
        self.model3 = model3

    def forward(self, x):
        x = self.model1(x)
        x = self.model2(x)
        x = self.model3(x)
        return x

In [35]:
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):
    """Generic training function for a neural network model."""
    losses = []
    for epoch in tqdm(range(num_epochs)):
        model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        losses.append(epoch_loss / len(train_loader))
    return losses

In [36]:
def train_first_stage(x_unlabeled, x_labeled, device):
    """Trains the first neural network."""
    x_total = np.concatenate([x_unlabeled, x_labeled])
    x_reduced = perform_pacmap(x_total, n_components=64)
    x_train_nn = torch.FloatTensor(x_total).unsqueeze(1)
    y_train_nn = torch.FloatTensor(x_reduced)
    train_dataset = TensorDataset(x_train_nn, y_train_nn)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    model1 = Net().to(device)
    criterion1 = nn.MSELoss()
    optimizer1 = optim.Adam(model1.parameters())
    losses1 = train_model(model1, train_loader, criterion1, optimizer1, num_epochs=10, device=device)
    
    return model1, x_train_nn

In [37]:
def train_second_stage(model1, x_train_nn, device):
    """Trains the second neural network."""
    transformed_unlabeled = model1(x_train_nn.to(device)).detach().cpu().numpy()
    x_transformed_16 = perform_pacmap(transformed_unlabeled, n_components=16)
    x_train_2 = torch.FloatTensor(transformed_unlabeled)
    y_train_2 = torch.FloatTensor(x_transformed_16)
    train_dataset_2 = TensorDataset(x_train_2, y_train_2)
    train_loader_2 = DataLoader(train_dataset_2, batch_size=32, shuffle=True)

    model2 = SecondNet().to(device)
    criterion2 = nn.MSELoss()
    optimizer2 = optim.Adam(model2.parameters())
    losses2 = train_model(model2, train_loader_2, criterion2, optimizer2, num_epochs=10, device=device)
    
    return model2

In [38]:
def train_third_stage(model1, model2, x_labeled, y_labeled, device):
    """Trains the third neural network."""
    x_labeled_tensor = torch.FloatTensor(x_labeled).unsqueeze(1)
    with torch.no_grad():
        model1.eval()
        model2.eval()
        intermediate = model1(x_labeled_tensor.to(device))
        processed_labeled = model2(intermediate).cpu().numpy()

    x_train_3 = torch.FloatTensor(processed_labeled)
    y_train_3 = torch.LongTensor(y_labeled)
    train_dataset_3 = TensorDataset(x_train_3, y_train_3)
    train_loader_3 = DataLoader(train_dataset_3, batch_size=32, shuffle=True)

    model3 = ThirdNet().to(device)
    criterion3 = nn.CrossEntropyLoss()
    optimizer3 = optim.Adam(model3.parameters())
    losses3 = train_model(model3, train_loader_3, criterion3, optimizer3, num_epochs=10, device=device)
    
    return model3

In [39]:
def train_combined_model(model1, model2, model3, x_labeled, y_labeled, device, num_epochs=10):
    combined_model = CombinedNet(model1, model2, model3).to(device)
    
    x_labeled_tensor = torch.FloatTensor(x_labeled).unsqueeze(1).to(device)
    y_labeled_tensor = torch.LongTensor(y_labeled).to(device)
    
    train_dataset = TensorDataset(x_labeled_tensor, y_labeled_tensor)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(combined_model.parameters())

    losses = []
    for epoch in tqdm(range(num_epochs)):
        combined_model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = combined_model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        losses.append(epoch_loss / len(train_loader))
        # print(f"Epoch {epoch+1}/{num_epochs}, Loss: {losses[-1]:.4f}")

    return combined_model

In [40]:
def train_first_stage_early_stopping(x_unlabeled, device, val_frac=0.1):
    """Trains the first neural network with early stopping."""
    x_reduced = perform_pacmap(x_unlabeled, n_components=64)
    val_len = int(val_frac * len(x_reduced))
    x_train_nn = torch.FloatTensor(x_unlabeled).unsqueeze(1)
    y_train_nn = torch.FloatTensor(x_reduced)
    train_dataset = TensorDataset(x_train_nn[:-val_len], y_train_nn[:-val_len])
    val_dataset = TensorDataset(x_train_nn[-val_len:], y_train_nn[-val_len:])
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    model1 = Net().to(device)
    criterion1 = nn.MSELoss()
    optimizer1 = optim.Adam(model1.parameters())
    best_loss = np.inf
    best_model = None

    for epoch in tqdm(range(20)):
        model1.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer1.zero_grad()
            outputs = model1(batch_x)
            loss = criterion1(outputs, batch_y)
            loss.backward()
            optimizer1.step()
            epoch_loss += loss.item()
        val_loss = 0
        for batch_x, batch_y in val_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model1(batch_x)
            loss = criterion1(outputs, batch_y)
            val_loss += loss.item()
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = model1.state_dict()
        else:
            break
    model1.load_state_dict(best_model)
    
    return model1, x_train_nn

In [41]:
def train_second_stage_early_stopping(model1, x_train_nn, device, val_frac=0.1):
    """Trains the second neural network with early stopping."""
    transformed_unlabeled = model1(x_train_nn.to(device)).detach().cpu().numpy()
    x_transformed_16 = perform_pacmap(transformed_unlabeled, n_components=16)
    val_len = int(val_frac * len(x_transformed_16))
    x_train_2 = torch.FloatTensor(transformed_unlabeled)
    y_train_2 = torch.FloatTensor(x_transformed_16)
    train_dataset_2 = TensorDataset(x_train_2[:-val_len], y_train_2[:-val_len])
    val_dataset_2 = TensorDataset(x_train_2[-val_len:], y_train_2[-val_len:])
    train_loader_2 = DataLoader(train_dataset_2, batch_size=32, shuffle=True)
    val_loader_2 = DataLoader(val_dataset_2, batch_size=32, shuffle=False)

    model2 = SecondNet().to(device)
    criterion2 = nn.MSELoss()
    optimizer2 = optim.Adam(model2.parameters())
    best_loss = np.inf
    best_model = None

    for epoch in tqdm(range(20)):
        model2.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader_2:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer2.zero_grad()
            outputs = model2(batch_x)
            loss = criterion2(outputs, batch_y)
            loss.backward()
            optimizer2.step()
            epoch_loss += loss.item()
        val_loss = 0
        for batch_x, batch_y in val_loader_2:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model2(batch_x)
            loss = criterion2(outputs, batch_y)
            val_loss += loss.item()
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = model2.state_dict()
        else:
            break
    model2.load_state_dict(best_model)

    return model2    

In [42]:
def train_third_stage_early_stopping(model1, model2, x_labeled, y_labeled, device, val_frac=0.1):
    """Trains the third neural network with early stopping."""
    if len(x_labeled) <= 10:
        return train_third_stage(model1, model2, x_labeled, y_labeled, device)
    
    val_len = int(val_frac * len(x_labeled))
    x_labeled_tensor = torch.FloatTensor(x_labeled).unsqueeze(1)
    with torch.no_grad():
        model1.eval()
        model2.eval()
        intermediate = model1(x_labeled_tensor.to(device))
        processed_labeled = model2(intermediate).cpu().numpy()

    x_train_3 = torch.FloatTensor(processed_labeled)
    y_train_3 = torch.LongTensor(y_labeled)
    train_dataset_3 = TensorDataset(x_train_3[:-val_len], y_train_3[:-val_len])
    val_dataset_3 = TensorDataset(x_train_3[-val_len:], y_train_3[-val_len:])
    train_loader_3 = DataLoader(train_dataset_3, batch_size=32, shuffle=True)
    val_loader_3 = DataLoader(val_dataset_3, batch_size=32, shuffle=False)

    model3 = ThirdNet().to(device)
    criterion3 = nn.CrossEntropyLoss()
    optimizer3 = optim.Adam(model3.parameters())
    best_loss = np.inf
    best_model = None

    for epoch in tqdm(range(20)):
        model3.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader_3:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            optimizer3.zero_grad()
            outputs = model3(batch_x)
            loss = criterion3(outputs, batch_y)
            loss.backward()
            optimizer3.step()
            epoch_loss += loss.item()
        val_loss = 0
        for batch_x, batch_y in val_loader_3:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model3(batch_x)
            loss = criterion3(outputs, batch_y)
            val_loss += loss.item()
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = model3.state_dict()
        else:
            break
    model3.load_state_dict(best_model)

    return model3

In [43]:
def train_combined_model_early_stopping(model1, model2, model3, x_labeled, y_labeled, device, num_epochs=10, val_frac=0.1):
    combined_model = CombinedNet(model1, model2, model3).to(device)

    if len(x_labeled) <= 10:
        return train_combined_model(model1, model2, model3, x_labeled, y_labeled, device, num_epochs)
    
    x_labeled_tensor = torch.FloatTensor(x_labeled).unsqueeze(1).to(device)
    y_labeled_tensor = torch.LongTensor(y_labeled).to(device)
    
    train_dataset = TensorDataset(x_labeled_tensor[:-int(val_frac * len(x_labeled))], y_labeled_tensor[:-int(val_frac * len(x_labeled))])
    val_dataset = TensorDataset(x_labeled_tensor[-int(val_frac * len(x_labeled)):], y_labeled_tensor[-int(val_frac * len(x_labeled)):])
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(combined_model.parameters())
    best_loss = np.inf
    best_model = None

    for epoch in tqdm(range(20)):
        combined_model.train()
        epoch_loss = 0
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = combined_model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        val_loss = 0
        for batch_x, batch_y in val_loader:
            outputs = combined_model(batch_x)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item()
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = combined_model.state_dict()
        else:
            break
    combined_model.load_state_dict(best_model)

    return combined_model

In [44]:
def process_and_classify(x_new, combined_model, device):
    with torch.no_grad():
        combined_model.eval()
        x_new_tensor = torch.FloatTensor(x_new).unsqueeze(1).to(device)
        output = combined_model(x_new_tensor)
        _, predicted = output.max(1)
    return predicted.cpu().numpy()

In [45]:
def evaluate_combined_model(combined_model, x_test, y_test, device):
    predicted_classes = process_and_classify(x_test, combined_model, device)
    accuracy = np.mean(predicted_classes == y_test)
    print(f"Accuracy on the test set: {accuracy:.2f}")
    return accuracy

In [46]:
def train_and_evaluate(x_labeled, y_labeled, x_unlabeled, x_test, y_test):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # First stage: PaCMAP data to 64 dimensions, then train a NN with embeddings as targets
    model1, x_train_nn = train_first_stage(x_unlabeled, x_labeled, device)

    # Second stage: PaCMAP the output of the first NN to 16 dimensions, then train a NN with embeddings as targets
    model2 = train_second_stage(model1, x_train_nn, device)

    # Third stage: Initialize the third model
    model3 = ThirdNet().to(device)

    # Combined training of all three models
    combined_model = train_combined_model(model1, model2, model3, x_labeled, y_labeled, device)

    # Evaluate on test set
    accuracy = evaluate_combined_model(combined_model, x_test, y_test, device)
    
    return accuracy

In [47]:
def train_and_evaluate_early_stopping(x_labeled, y_labeled, x_unlabeled, x_test, y_test):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # First stage: PaCMAP data to 64 dimensions, then train a NN with embeddings as targets
    model1, x_train_nn = train_first_stage_early_stopping(x_unlabeled, device)

    # Second stage: PaCMAP the output of the first NN to 16 dimensions, then train a NN with embeddings as targets
    model2 = train_second_stage_early_stopping(model1, x_train_nn, device)

    # Third stage: Initialize the third model
    model3 = ThirdNet().to(device)

    # Combined training of all three models
    combined_model = train_combined_model_early_stopping(model1, model2, model3, x_labeled, y_labeled, device)

    # Evaluate on test set
    accuracy = evaluate_combined_model(combined_model, x_test, y_test, device)

    return accuracy

In [48]:
def train_and_evaluate_with_labeled_ratio(labeled_ratio):
    """Trains and evaluates the model given a ratio of labeled data."""
    x_train, y_train, x_test, y_test = load_and_preprocess_data()
    x_labeled, x_unlabeled, y_labeled = split_data(x_train, y_train, labeled_ratio)
    accuracy = train_and_evaluate(x_labeled, y_labeled, x_unlabeled, x_test, y_test)
    return accuracy

In [49]:
def train_and_evaluate_with_labeled_ratio_early_stopping(labeled_ratio):
    """Trains and evaluates the model given a ratio of labeled data with early stopping."""
    x_train, y_train, x_test, y_test = load_and_preprocess_data()
    x_labeled, x_unlabeled, y_labeled = split_data(x_train, y_train, labeled_ratio)
    accuracy = train_and_evaluate_early_stopping(x_labeled, y_labeled, x_unlabeled, x_test, y_test)
    return accuracy

In [24]:
labeled_ratios = [0.5, 0.1, 0.05, 0.01, 0.001]
accuracies = {}

for ratio in labeled_ratios:
    print(f"\nTraining with {ratio*100}% labeled data:")
    accuracy = train_and_evaluate_with_labeled_ratio_early_stopping(ratio)
    accuracies[ratio] = accuracy
labeled_ratios = [0.5, 0.1, 0.05, 0.01, 0.001]
accuracies_val = {}


Training with 50.0% labeled data:


 50%|█████     | 10/20 [01:01<01:01,  6.19s/it]
 95%|█████████▌| 19/20 [00:30<00:01,  1.63s/it]
 15%|█▌        | 3/20 [00:27<02:37,  9.25s/it]


Accuracy on the test set: 0.97

Training with 10.0% labeled data:


 35%|███▌      | 7/20 [01:29<02:45, 12.72s/it]
 90%|█████████ | 18/20 [00:56<00:06,  3.13s/it]
 15%|█▌        | 3/20 [00:05<00:32,  1.89s/it]


Accuracy on the test set: 0.96

Training with 5.0% labeled data:


 25%|██▌       | 5/20 [01:06<03:18, 13.21s/it]
 95%|█████████▌| 19/20 [01:07<00:03,  3.54s/it]
 20%|██        | 4/20 [00:03<00:14,  1.12it/s]


Accuracy on the test set: 0.95

Training with 1.0% labeled data:


 30%|███       | 6/20 [01:19<03:05, 13.28s/it]
 70%|███████   | 14/20 [00:48<00:20,  3.43s/it]
 25%|██▌       | 5/20 [00:00<00:02,  6.06it/s]


Accuracy on the test set: 0.94

Training with 0.1% labeled data:


 30%|███       | 6/20 [01:24<03:18, 14.17s/it]
 50%|█████     | 10/20 [00:34<00:34,  3.46s/it]
 65%|██████▌   | 13/20 [00:00<00:00, 53.72it/s]


Accuracy on the test set: 0.68


In [50]:
x_train, y_train, x_test, y_test = load_and_preprocess_data()

# One sample per class
x_labeled_one_per_class = []
y_labeled_one_per_class = []
x_unlabeled_one_per_class = []
for i in range(10):
    indices = np.where(y_train == i)[0]
    x_labeled_one_per_class.append(x_train[indices[0]])
    y_labeled_one_per_class.append(i)
    x_unlabeled_one_per_class.extend(x_train[indices[1:]])

x_labeled_one_per_class = np.array(x_labeled_one_per_class)
y_labeled_one_per_class = np.array(y_labeled_one_per_class)
x_unlabeled_one_per_class = np.array(x_unlabeled_one_per_class)

print("\nTraining with one sample per class:")
accuracy_one_per_class = train_and_evaluate_early_stopping(x_labeled_one_per_class, y_labeled_one_per_class, x_unlabeled_one_per_class, x_test, y_test)
accuracies['one_per_class'] = accuracy_one_per_class


Training with one sample per class:


 20%|██        | 4/20 [00:55<03:40, 13.76s/it]
 65%|██████▌   | 13/20 [00:42<00:22,  3.28s/it]
100%|██████████| 10/10 [00:00<00:00, 134.96it/s]

Accuracy on the test set: 0.28





In [51]:
# Print results
for ratio, accuracy in accuracies.items():
    if ratio == 'one_per_class':
        print(f"Accuracy with one sample per class: {accuracy}")
    else:
        print(f"Accuracy with {ratio*100}% labeled data: {accuracy}")

Accuracy with 50.0% labeled data: 0.9684
Accuracy with 10.0% labeled data: 0.9619
Accuracy with 5.0% labeled data: 0.9506
Accuracy with 1.0% labeled data: 0.936
Accuracy with 0.1% labeled data: 0.677
Accuracy with one sample per class: 0.2825
