In [9]:
import torch
import torchvision
import numpy as np

import warnings
warnings.filterwarnings('ignore') 

In [10]:
efnet_weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
efnet_model = torchvision.models.efficientnet_b0(weights = efnet_weights)

In [12]:
print(efnet_model)

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [13]:
print(efnet_weights.transforms())

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)


# 1 - a
kernel_size is (3, 3).

# 1 - b

This is a technique for regularization. Randomly assign elements zero will prevent the model to overfit the data. 


In [14]:
### Read flattened, processed data
import pandas as pd
fash_mnist = pd.read_csv("https://remiller1450.github.io/data/fashion_mnist_train.csv")

## Train-test split
from sklearn.model_selection import train_test_split
train_fash, test_fash = train_test_split(fash_mnist, test_size=0.1, random_state=5)

### Separate the label column (outcome)
train_y = train_fash['y']
train_X = train_fash.drop(['y'], axis=1)
test_y = test_fash['y']
test_X = test_fash.drop(['y'], axis=1)

### Convert to numpy array then reshape to 900 by 28 by 28
mnist_unflattened = train_X.to_numpy()
mnist_unflattened = mnist_unflattened.reshape(900,28,28)

## Convert to tensor
mnist_tensor = torch.from_numpy(mnist_unflattened)
mnist_tensor =  torch.unsqueeze(mnist_tensor, dim=1)

## Transform to proper input shape (duplicate the single color channel to produce 3 channels)
new_mnist_tensors = mnist_tensor.expand(-1, 3, -1, -1)

## Store in DataLoader
from torch.utils.data import DataLoader, TensorDataset
y_tensor = torch.Tensor(train_y)
train_loader = DataLoader(TensorDataset(new_mnist_tensors.type(torch.FloatTensor), y_tensor.type(torch.LongTensor)), batch_size=100)


In [None]:
## Loop through each parameter and set `requires_grad` to false
for param in efnet_model.features.parameters():
    param.requires_grad = False
## Replace the "classifier" layer with one for our application
efnet_model.classifier = torch.nn.Sequential( 
    torch.nn.Linear(in_features=1280, out_features=10, bias=True))

In [23]:
sum([np.prod(np.array(param.shape)) for param in efnet_model.features.parameters() ])

4007548

# 2-a 

Total number of parameters are 4007548. 

# 2-b

avgpool will not learn any parameter fron the dataset. 