In [1]:
import pandas as pd
import numpy as np
from numpy.typing import NDArray
from sklearn.model_selection import train_test_split
import sys
import statsmodels.api as sm
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import patsy
import torch
from torch import nn
from tqdm import tqdm
import os
sys.path.append('../../')

np.random.seed(0)
torch.manual_seed(0)

from package.fim.fim import compute_fims_nn
from package.plot_utils.common import SINGLE_PLOT_FIGSIZE, DPI
from package.neural_network.fit import fit_nn, FitOption
from package.data.mnist import load_data

train_loader, test_loader = load_data()

input_dim = 28*28
output_dim = 10

In [None]:
import sys
sys.path.append('/content/effective-dimension')

import shutil
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import numpy as np
import torch.optim as optim
import matplotlib.pyplot as plt
import os
import random

from package.effective_dimension.effective_dimension import EDType, EffectiveDimensionApprox
from package.fim.fim import compute_fims_nn
from package.neural_network.neural_network import ClassNetwork
from package.neural_network.util import get_dimension
torch.manual_seed(42)

from package.directories import data_dir, weights_dir, eigenvalues_dir

plt.figure(figsize=(8, 5), dpi=300)


def load_data(batch_size: int=64):
    transform = transforms.ToTensor()

    trainset = datasets.MNIST(root=data_dir, train=True, download=True, transform=transform)
    testset = datasets.MNIST(root=data_dir, train=False, download=True, transform=transform)

    train_loader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
    testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=False)

    return train_loader, testloader


train_loader, test_loader = load_data()

input_dim = 28*28
layer_sizes = [20, 20, 20, 20]
output_dim = 10

os.mkdir(weights_dir)
os.mkdir(eigenvalues_dir)

if not os.path.isdir(os.path.join(weights_dir, 'temp')):
    os.mkdir(os.path.join(weights_dir, 'temp'))

if not os.path.isdir(os.path.join(eigenvalues_dir, 'temp')):
    os.mkdir(os.path.join(eigenvalues_dir, 'temp'))

for i, rate in enumerate([0, 0.1, 0.2, 0.3, 0.4, 0.5]):
    np.random.seed(0)
    torch.manual_seed(0)
    random.seed(0)

    network = ClassNetwork(input_dim, layer_sizes, output_dim, dropout_rate=rate)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(network.parameters(), lr=0.001)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    print(device)

    network = network.to(torch.device(device))

    save_epochs = [1, 5, 10, 15, 20]

    if not os.path.isdir(os.path.join(weights_dir, 'temp', f'{i}')):
        os.mkdir(os.path.join(weights_dir, 'temp', f'{i}'))

    run_dir = os.path.join(weights_dir, f'temp', f'{i}')

    shutil.rmtree(run_dir)
    os.mkdir(run_dir)

    epochs = 20
    network.train()
    for epoch in range(epochs):
        running_loss = 0.0
        epoch_loss = 0.0
        total_samples = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = network(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            batch_size = inputs.size(0)
            epoch_loss += loss.item() * batch_size
            total_samples += batch_size
            running_loss += loss.item()

        avg_loss = epoch_loss / total_samples
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")

        if epoch+1 in save_epochs:
            network.eval()
            model_path = os.path.join(
                run_dir, f'epoch_{epoch + 1}.pth')
            if os.path.isfile(model_path):
                os.remove(model_path)
            torch.save(network.state_dict(), model_path)
            network.train()


    del network

    eff_dims = []
    from tqdm import tqdm
    with tqdm(save_epochs, desc=f"Computing EDs", unit="epoch") as pbar:
        for epoch in pbar:
            network = ClassNetwork(input_dim, layer_sizes, output_dim, dropout_rate=rate)
            model_path = os.path.join(
                    run_dir, f'epoch_{epoch}.pth')

            network.load_state_dict(torch.load(model_path))
            network.eval()

            if not os.path.isdir(os.path.join(eigenvalues_dir, 'temp', f'{i}')):
                os.mkdir(os.path.join(eigenvalues_dir, 'temp', f'{i}'))

            compute_fims_nn(
                input_dim,
                layer_sizes,
                output_dim,

                num_thetas=1,
                save_dir=os.path.join(eigenvalues_dir, 'temp', f'{i}'),
                filename=f'temp_{epoch+1}',

                data_loader=train_loader,

                network=network,
                theta_min=-5e-3,
                theta_max=5e-3,

                verbose=False
            )

            network = network.to(torch.device(device))

            if epoch == save_epochs[-1]:
                epoch_loss = 0.0
                total_samples = 0
                for inputs, labels in test_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = network(inputs)
                    loss = criterion(outputs, labels)

                    batch_size = inputs.size(0)
                    epoch_loss += loss.item() * batch_size
                    total_samples += batch_size

                avg_loss_test = epoch_loss / total_samples

                epoch_loss = 0.0
                total_samples = 0
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = network(inputs)
                    loss = criterion(outputs, labels)

                    batch_size = inputs.size(0)
                    epoch_loss += loss.item() * batch_size
                    total_samples += batch_size

                avg_loss_train = epoch_loss / total_samples

                print(f'{i}, {dim}, Train {avg_loss_train}, Test {avg_loss_test}, Gen err: {abs(avg_loss_test - avg_loss_train)}, eff dim: {eff_dims[-1]}')

            del network

            dim = get_dimension(input_dim, layer_sizes, output_dim)

            file_paths = os.path.join(eigenvalues_dir, f'temp', f'{i}', f'temp_{epoch+1}_{dim}.h5')
            ef = EffectiveDimensionApprox(file_paths, file_paths)
            eff_dims.append(
                ef.compute(60000, EDType.LOCAL, gamma=1, eps=1e-7, chunk_size=5, verbose=False)[0] / dim
            )

            pbar.update(1)




    plt.plot(save_epochs, eff_dims, label=f'Rate={rate}')

plt.legend()
plt.ylabel('Normalized Effective Dimension')
plt.xlabel('Iteration')
plt.title('Normalized Effective Dimension of Neural Networks\nVarying Dropout Rate')
plt.show()

In [None]:
import sys
sys.path.append('/content/effective-dimension')

import shutil
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import numpy as np
import torch.optim as optim
import matplotlib.pyplot as plt
import os
import random

from package.effective_dimension.effective_dimension import EDType, EffectiveDimensionApprox
from package.fim.fim import compute_fims_nn
from package.neural_network.neural_network import ClassNetwork
from package.neural_network.util import get_dimension
torch.manual_seed(42)

from package.directories import data_dir, weights_dir, eigenvalues_dir

plt.figure(figsize=(8, 5), dpi=300)


def load_data(batch_size: int=64):
    transform = transforms.ToTensor()

    trainset = datasets.MNIST(root=data_dir, train=True, download=True, transform=transform)
    testset = datasets.MNIST(root=data_dir, train=False, download=True, transform=transform)

    train_loader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
    testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=False)

    return train_loader, testloader


train_loader, test_loader = load_data()

input_dim = 28*28
layer_sizes = [100, 100]
output_dim = 10

os.mkdir(weights_dir)
os.mkdir(eigenvalues_dir)

if not os.path.isdir(os.path.join(weights_dir, 'temp')):
    os.mkdir(os.path.join(weights_dir, 'temp'))

if not os.path.isdir(os.path.join(eigenvalues_dir, 'temp')):
    os.mkdir(os.path.join(eigenvalues_dir, 'temp'))

for i, rate in enumerate([0, 0.1, 0.2, 0.3, 0.4, 0.5]):
    np.random.seed(0)
    torch.manual_seed(0)
    random.seed(0)

    network = ClassNetwork(input_dim, layer_sizes, output_dim, dropout_rate=rate)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(network.parameters(), lr=0.001)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    print(device)

    network = network.to(torch.device(device))

    save_epochs = [1, 5, 10, 15, 20]

    if not os.path.isdir(os.path.join(weights_dir, 'temp', f'{i}')):
        os.mkdir(os.path.join(weights_dir, 'temp', f'{i}'))

    run_dir = os.path.join(weights_dir, f'temp', f'{i}')

    shutil.rmtree(run_dir)
    os.mkdir(run_dir)

    epochs = 20
    network.train()
    for epoch in range(epochs):
        running_loss = 0.0
        epoch_loss = 0.0
        total_samples = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = network(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            batch_size = inputs.size(0)
            epoch_loss += loss.item() * batch_size
            total_samples += batch_size
            running_loss += loss.item()

        avg_loss = epoch_loss / total_samples
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")

        if epoch+1 in save_epochs:
            network.eval()
            model_path = os.path.join(
                run_dir, f'epoch_{epoch + 1}.pth')
            if os.path.isfile(model_path):
                os.remove(model_path)
            torch.save(network.state_dict(), model_path)
            network.train()


    del network

    eff_dims = []
    from tqdm import tqdm
    with tqdm(save_epochs, desc=f"Computing EDs", unit="epoch") as pbar:
        for epoch in pbar:
            network = ClassNetwork(input_dim, layer_sizes, output_dim, dropout_rate=rate)
            model_path = os.path.join(
                    run_dir, f'epoch_{epoch}.pth')

            network.load_state_dict(torch.load(model_path))
            network.eval()

            if not os.path.isdir(os.path.join(eigenvalues_dir, 'temp', f'{i}')):
                os.mkdir(os.path.join(eigenvalues_dir, 'temp', f'{i}'))

            compute_fims_nn(
                input_dim,
                layer_sizes,
                output_dim,

                num_thetas=1,
                save_dir=os.path.join(eigenvalues_dir, 'temp', f'{i}'),
                filename=f'temp_{epoch+1}',

                data_loader=train_loader,

                network=network,
                theta_min=-5e-3,
                theta_max=5e-3,

                verbose=False
            )

            network = network.to(torch.device(device))

            if epoch == save_epochs[-1]:
                epoch_loss = 0.0
                total_samples = 0
                for inputs, labels in test_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = network(inputs)
                    loss = criterion(outputs, labels)

                    batch_size = inputs.size(0)
                    epoch_loss += loss.item() * batch_size
                    total_samples += batch_size

                avg_loss_test = epoch_loss / total_samples

                epoch_loss = 0.0
                total_samples = 0
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = network(inputs)
                    loss = criterion(outputs, labels)

                    batch_size = inputs.size(0)
                    epoch_loss += loss.item() * batch_size
                    total_samples += batch_size

                avg_loss_train = epoch_loss / total_samples

                print(f'{i}, {dim}, Train {avg_loss_train}, Test {avg_loss_test}, Gen err: {abs(avg_loss_test - avg_loss_train)}, eff dim: {eff_dims[-1]}')

            del network

            dim = get_dimension(input_dim, layer_sizes, output_dim)

            file_paths = os.path.join(eigenvalues_dir, f'temp', f'{i}', f'temp_{epoch+1}_{dim}.h5')
            ef = EffectiveDimensionApprox(file_paths, file_paths)
            eff_dims.append(
                ef.compute(60000, EDType.LOCAL, gamma=1, eps=1e-7, chunk_size=5, verbose=False)[0] / dim
            )

            pbar.update(1)




    plt.plot(save_epochs, eff_dims, label=f'Rate={rate}')

plt.legend()
plt.ylabel('Normalized Effective Dimension')
plt.xlabel('Iteration')
plt.title('Normalized Effective Dimension of Neural Networks\nVarying Dropout Rate')
plt.show()

In [None]:
import sys
sys.path.append('/content/effective-dimension')

import shutil
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import numpy as np
import torch.optim as optim
import matplotlib.pyplot as plt
import os
import random

from package.effective_dimension.effective_dimension import EDType, EffectiveDimensionApprox
from package.fim.fim import compute_fims_nn
from package.neural_network.neural_network import ClassNetwork
from package.neural_network.util import get_dimension
torch.manual_seed(42)

from package.directories import data_dir, weights_dir, eigenvalues_dir

plt.figure(figsize=(8, 5), dpi=300)


def load_data(batch_size: int=64):
    transform = transforms.ToTensor()

    trainset = datasets.MNIST(root=data_dir, train=True, download=True, transform=transform)
    testset = datasets.MNIST(root=data_dir, train=False, download=True, transform=transform)

    train_loader = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
    testloader = DataLoader(dataset=testset, batch_size=batch_size, shuffle=False)

    return train_loader, testloader


train_loader, test_loader = load_data()

input_dim = 28*28
layer_sizes = [10]
output_dim = 10

os.mkdir(weights_dir)
os.mkdir(eigenvalues_dir)

if not os.path.isdir(os.path.join(weights_dir, 'temp')):
    os.mkdir(os.path.join(weights_dir, 'temp'))

if not os.path.isdir(os.path.join(eigenvalues_dir, 'temp')):
    os.mkdir(os.path.join(eigenvalues_dir, 'temp'))

for i, rate in enumerate([0, 0.1, 0.2, 0.3, 0.4, 0.5]):
    np.random.seed(0)
    torch.manual_seed(0)
    random.seed(0)

    network = ClassNetwork(input_dim, layer_sizes, output_dim, dropout_rate=rate)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(network.parameters(), lr=0.001)

    device = "cuda" if torch.cuda.is_available() else "cpu"

    print(device)

    network = network.to(torch.device(device))

    save_epochs = [1, 5, 10, 15, 20]

    if not os.path.isdir(os.path.join(weights_dir, 'temp', f'{i}')):
        os.mkdir(os.path.join(weights_dir, 'temp', f'{i}'))

    run_dir = os.path.join(weights_dir, f'temp', f'{i}')

    shutil.rmtree(run_dir)
    os.mkdir(run_dir)

    epochs = 20
    network.train()
    for epoch in range(epochs):
        running_loss = 0.0
        epoch_loss = 0.0
        total_samples = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = network(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            batch_size = inputs.size(0)
            epoch_loss += loss.item() * batch_size
            total_samples += batch_size
            running_loss += loss.item()

        avg_loss = epoch_loss / total_samples
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")

        if epoch+1 in save_epochs:
            network.eval()
            model_path = os.path.join(
                run_dir, f'epoch_{epoch + 1}.pth')
            if os.path.isfile(model_path):
                os.remove(model_path)
            torch.save(network.state_dict(), model_path)
            network.train()


    del network

    eff_dims = []
    from tqdm import tqdm
    with tqdm(save_epochs, desc=f"Computing EDs", unit="epoch") as pbar:
        for epoch in pbar:
            network = ClassNetwork(input_dim, layer_sizes, output_dim, dropout_rate=rate)
            model_path = os.path.join(
                    run_dir, f'epoch_{epoch}.pth')

            network.load_state_dict(torch.load(model_path))
            network.eval()

            if not os.path.isdir(os.path.join(eigenvalues_dir, 'temp', f'{i}')):
                os.mkdir(os.path.join(eigenvalues_dir, 'temp', f'{i}'))

            compute_fims_nn(
                input_dim,
                layer_sizes,
                output_dim,

                num_thetas=1,
                save_dir=os.path.join(eigenvalues_dir, 'temp', f'{i}'),
                filename=f'temp_{epoch+1}',

                data_loader=train_loader,

                network=network,
                theta_min=-5e-3,
                theta_max=5e-3,

                verbose=False
            )

            network = network.to(torch.device(device))

            if epoch == save_epochs[-1]:
                epoch_loss = 0.0
                total_samples = 0
                for inputs, labels in test_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = network(inputs)
                    loss = criterion(outputs, labels)

                    batch_size = inputs.size(0)
                    epoch_loss += loss.item() * batch_size
                    total_samples += batch_size

                avg_loss_test = epoch_loss / total_samples

                epoch_loss = 0.0
                total_samples = 0
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = network(inputs)
                    loss = criterion(outputs, labels)

                    batch_size = inputs.size(0)
                    epoch_loss += loss.item() * batch_size
                    total_samples += batch_size

                avg_loss_train = epoch_loss / total_samples

                print(f'{i}, {dim}, Train {avg_loss_train}, Test {avg_loss_test}, Gen err: {abs(avg_loss_test - avg_loss_train)}, eff dim: {eff_dims[-1]}')

            del network

            dim = get_dimension(input_dim, layer_sizes, output_dim)

            file_paths = os.path.join(eigenvalues_dir, f'temp', f'{i}', f'temp_{epoch+1}_{dim}.h5')
            ef = EffectiveDimensionApprox(file_paths, file_paths)
            eff_dims.append(
                ef.compute(60000, EDType.LOCAL, gamma=1, eps=1e-7, chunk_size=5, verbose=False)[0] / dim
            )

            pbar.update(1)




    plt.plot(save_epochs, eff_dims, label=f'Rate={rate}')

plt.legend()
plt.ylabel('Normalized Effective Dimension')
plt.xlabel('Iteration')
plt.title('Normalized Effective Dimension of Neural Networks\nVarying Dropout Rate')
plt.show()