In [6]:
from time import time
import torch
import numpy as np
import torch.nn.functional as F
from typing import List
from collections import OrderedDict
from torch import nn
from torch import asin
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.data.dataloader import default_collate
from torchvision import datasets
from torchvision.transforms import ToTensor
import layers

import importlib
importlib.reload(layers)
from layers import LinearArcsine, RandomFeatureMap, ArcsinNN, RepresentArcsineNN, ApproxArcsineNN
import matplotlib.pyplot as plt
import math

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cuda device


In [8]:
X = torch.randn(20,784).to(device)

In [15]:
model = ArcsinNN(in_features=784, out_features=10, hidden_features=[1024]*3, bias=True).to(device)
approx_factoring_scratch = RepresentArcsineNN(model, seed=20220904, ignore_first_layer=False, project_dim=500).to(device)
approx_factoring_scratch(X)

tensor([[-0.0193,  0.0607,  0.1705,  0.0276,  0.0770,  0.1142, -0.1347, -0.0080,
         -0.0188,  0.0393],
        [ 0.0146,  0.0361,  0.0222,  0.0258,  0.0262,  0.0227, -0.0133, -0.0590,
          0.0122,  0.0920],
        [-0.0618, -0.0546,  0.0774, -0.0041, -0.0546,  0.0093,  0.0008, -0.0218,
          0.0288, -0.0034],
        [-0.0102,  0.1099, -0.0425,  0.0238,  0.0185, -0.0003, -0.0247, -0.0334,
         -0.0026,  0.0023],
        [-0.0467,  0.0672,  0.1156,  0.0317,  0.0590,  0.0959, -0.0233,  0.0441,
          0.0401,  0.0361],
        [-0.0091, -0.0338,  0.0148,  0.0977, -0.0476, -0.0313, -0.0150,  0.0166,
          0.0621,  0.0810],
        [ 0.0230,  0.0577,  0.0270,  0.0013,  0.0385,  0.0475,  0.0389, -0.0956,
          0.0497,  0.0098],
        [-0.0619,  0.0193, -0.0154,  0.0009,  0.0222, -0.0362, -0.0589, -0.0882,
          0.0404,  0.0248],
        [ 0.0503,  0.0725, -0.0069,  0.0426,  0.0293,  0.1456,  0.0064, -0.1217,
         -0.0243,  0.0983],
        [-0.0683,  

In [4]:
print(approx_factoring_scratch, approx_factoring_scratch.RandomFeatureMaps)

RepresentArcsineNN(
  (Flatten): Flatten(start_dim=1, end_dim=-1)
  (Linears): ModuleList(
    (0): Linear(in_features=784, out_features=1024, bias=False)
    (1): Linear(in_features=1024, out_features=1024, bias=False)
  )
  (Output): Linear(in_features=1024, out_features=10, bias=True)
) {784: RandomFeatureMap(
  (weights): ParameterList(  (0): Parameter containing: [torch.cuda.FloatTensor of size 784x2000 (GPU 0)])
), 2000: RandomFeatureMap(
  (weights): ParameterList(  (0): Parameter containing: [torch.cuda.FloatTensor of size 2000x2000 (GPU 0)])
)}


In [15]:
print(model)

ArcsinNN(
  (Flatten): Flatten(start_dim=1, end_dim=-1)
  (Layers): Sequential(
    (LinearArcsine0): LinearArcsine(in_features=100, out_features=200, bias=True)
    (Output): Linear(in_features=200, out_features=10, bias=True)
  )
)


In [16]:
model_approximated = ApproxArcsineNN(model,project_dim=5000)
model_approximated_with_composition = RepresentArcsineNN(model,project_dim=5000)

In [17]:
print(model_approximated, "\n", model_approximated_with_composition)
print(model_approximated.RandomFeatureMaps, "\n", model_approximated_with_composition.RandomFeatureMaps)

ApproxArcsineNN(
  (Flatten): Flatten(start_dim=1, end_dim=-1)
  (Linears): ModuleList(
    (0): Linear(in_features=100, out_features=200, bias=True)
  )
  (Output): Linear(in_features=200, out_features=10, bias=True)
) 
 RepresentArcsineNN(
  (Flatten): Flatten(start_dim=1, end_dim=-1)
  (Linears): ModuleList(
    (0): Linear(in_features=100, out_features=200, bias=True)
  )
  (Output): Linear(in_features=200, out_features=10, bias=True)
)
{100: RandomFeatureMap(
  (weights): ParameterList(  (0): Parameter containing: [torch.FloatTensor of size 101x5000])
)} 
 {0: RandomFeatureMap(
  (weights): ParameterList(  (0): Parameter containing: [torch.FloatTensor of size 101x5000])
)}


In [18]:
model_approximated.RandomFeatureMaps[100] = model_approximated_with_composition.RandomFeatureMaps[0]

In [19]:
X = torch.randn((50,100))

In [20]:
torch.norm(model_approximated(X) - model_approximated_with_composition(X))/torch.norm(model_approximated_with_composition(X))

tensor(3.6115e-07, grad_fn=<DivBackward0>)

In [29]:
model = ArcsinNN(in_features=100, hidden_features=[100,200,300], out_features=10,bias=True).to(device)

In [30]:
model_approximated = ApproxArcsineNN(model)
model_approximated_with_composition = RepresentArcsineNN(model, ignore_first_layer=False).to(device)
print(model_approximated, "\n", model_approximated.RandomFeatureMaps)
print(model_approximated_with_composition)

ApproxArcsineNN(
  (Flatten): Flatten(start_dim=1, end_dim=-1)
  (Linears): ModuleList(
    (0): Linear(in_features=100, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=200, bias=True)
    (2): Linear(in_features=200, out_features=300, bias=True)
  )
  (Output): Linear(in_features=300, out_features=10, bias=True)
) 
 {200: RandomFeatureMap(
  (weights): ParameterList(  (0): Parameter containing: [torch.cuda.FloatTensor of size 201x201 (GPU 0)])
), 100: RandomFeatureMap(
  (weights): ParameterList(  (0): Parameter containing: [torch.cuda.FloatTensor of size 101x101 (GPU 0)])
)}
RepresentArcsineNN(
  (Flatten): Flatten(start_dim=1, end_dim=-1)
  (Linears): ModuleList(
    (0): Linear(in_features=100, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=200, bias=True)
    (2): Linear(in_features=200, out_features=300, bias=True)
  )
  (Output): Linear(in_features=300, out_features=10, bias=True)
)


In [31]:
X = torch.randn((50,100)).to(device)
print(model_approximated(X).shape)
print(model_approximated_with_composition(X).shape)

torch.Size([50, 10])
torch.Size([50, 10])


In [3]:
model = ArcsinNN(in_features=100, out_features=10, hidden_features=[100,200,300])

In [4]:
model_approximated_with_composition = RepresentArcsineNN(model)

In [5]:
X = torch.randn(50,100)
model_approximated_with_composition(X).shape

RuntimeError: mat1 and mat2 shapes cannot be multiplied (101x100 and 101x101)