<a href="https://colab.research.google.com/github/gin7018/image-classifier-convo-nn/blob/main/image_classifier_convolutional_nn_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision

In [29]:
import torch
import torchvision
import transformers

class ImageClassifierModel(transformers.PreTrainedModel):

  def __init__(self, num_labels=10):
    model_config = transformers.PretrainedConfig(
        num_channels=[32, 64],
        kernel_sizes=[3, 3],
        hidden_size=1600,
        num_classes=num_labels,
        pool_type="max"

    )
    super(ImageClassifierModel, self).__init__(config=model_config)

    self.convo1= torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
    self.convo2= torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
    self.max_pooling1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)

    self.convo3= torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
    self.convo4= torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
    self.max_pooling2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)

    self.connected_layer1 = torch.nn.Linear(in_features=1600, out_features=128)
    self.activation_fun = torch.nn.ReLU()
    self.connected_layer2 = torch.nn.Linear(in_features=128, out_features=num_labels)

  def forward(self, image_input):
    output = self.convo1(image_input)
    output= self.convo2(output)
    output = self.max_pooling1(output)

    output = self.convo3(output)
    output = self.convo4(output)
    output = self.max_pooling2(output)

    output = output.reshape(output.size(0), -1)

    output = self.connected_layer1(output)
    output = self.activation_fun(output)
    output = self.connected_layer2(output)
    return output

In [3]:
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import TensorDataset, random_split, DataLoader, RandomSampler, SequentialSampler

data_transformer = transforms.Compose([transforms.Resize((32,32)),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                                          std=[0.2023, 0.1994, 0.2010])
                                     ])

training_set = torchvision.datasets.CIFAR10(
    root="./data",
    train=True,
    transform=data_transformer,
    download=True
)

validation_set = torchvision.datasets.CIFAR10(
    root="./data",
    train=False,
    transform=data_transformer,
    download=True
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 28753153.36it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [30]:
# model training parameters
BATCH_SIZE = 64
EPOCHS = 21
LEARNING_RATE = 0.001 # TOO LOW OR TOO HIGH of a lr leads to convergence issues (nan) during training

training_data_loader = DataLoader(
    dataset=training_set,
    batch_size=BATCH_SIZE,
    sampler=RandomSampler(training_set)
)

validation_data_loader = DataLoader(
    dataset=validation_set,
    batch_size=BATCH_SIZE,
    sampler=SequentialSampler(validation_set)
)

In [32]:
from tqdm import tqdm

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = ImageClassifierModel()
model.to(device)

optimizer = torch.optim.Adam(
    params=model.parameters(),
    lr=LEARNING_RATE
)
loss_function = torch.nn.CrossEntropyLoss()

best_accuracy = 0.0
for epoch in range(EPOCHS):
  model.train()
  training_progress_bar = tqdm(training_data_loader, desc=f"Epoch {epoch + 1} - Training")
  for idx, (images, target_labels) in enumerate(training_progress_bar):
    images = images.to(device)
    target_labels = target_labels.to(device)

    outputs = model(images)
    loss = loss_function(outputs, target_labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    training_progress_bar.set_postfix(loss=loss.item())


  # testing how good our model is at classifying the sentences
  model.eval()
  total_correct = 0
  total_samples = 0
  validation_progress_bar = tqdm(validation_data_loader, desc=f"Epoch {epoch + 1} - Testing")
  with torch.no_grad():
    for idx, (images, target_labels) in enumerate(validation_progress_bar):
      images = images.to(device)
      target_labels = target_labels.to(device)

      outputs = model(images)
      _, predictions = torch.max(outputs.data, 1)
      total_correct += (predictions == target_labels).sum().item()
      total_samples += target_labels.size(0)
    print(f"epoch: {epoch+1}, accuracy: {100 * (total_correct / total_samples)}")

  current_accuracy = total_correct / total_samples
  if current_accuracy > best_accuracy:
    best_accuracy = current_accuracy
    print("saving to huggingface...")
    model.save_pretrained(
      save_directory="./model",
      state_dict=model.state_dict(),
      push_to_hub=True,
      repo_id="ghislainehaha/image-classifier-cnn",
    )




Epoch 1 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.51it/s, loss=0.858]
Epoch 1 - Testing: 100%|██████████| 157/157 [00:03<00:00, 52.02it/s]


epoch: 1, accuracy: 63.4
saving to huggingface...


model.safetensors:   0%|          | 0.00/1.09M [00:00<?, ?B/s]

Epoch 2 - Training: 100%|██████████| 782/782 [00:24<00:00, 32.34it/s, loss=0.552]
Epoch 2 - Testing: 100%|██████████| 157/157 [00:02<00:00, 58.52it/s]


epoch: 2, accuracy: 66.02
saving to huggingface...


model.safetensors:   0%|          | 0.00/1.09M [00:00<?, ?B/s]

Epoch 3 - Training: 100%|██████████| 782/782 [00:20<00:00, 37.84it/s, loss=0.816]
Epoch 3 - Testing: 100%|██████████| 157/157 [00:02<00:00, 56.17it/s]


epoch: 3, accuracy: 70.17
saving to huggingface...


model.safetensors:   0%|          | 0.00/1.09M [00:00<?, ?B/s]

Epoch 4 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.41it/s, loss=1.07]
Epoch 4 - Testing: 100%|██████████| 157/157 [00:03<00:00, 51.31it/s]


epoch: 4, accuracy: 70.23
saving to huggingface...


model.safetensors:   0%|          | 0.00/1.09M [00:00<?, ?B/s]

Epoch 5 - Training: 100%|██████████| 782/782 [00:19<00:00, 39.26it/s, loss=0.264]
Epoch 5 - Testing: 100%|██████████| 157/157 [00:03<00:00, 47.04it/s]


epoch: 5, accuracy: 70.95
saving to huggingface...


model.safetensors:   0%|          | 0.00/1.09M [00:00<?, ?B/s]

Epoch 6 - Training: 100%|██████████| 782/782 [00:21<00:00, 36.76it/s, loss=0.683]
Epoch 6 - Testing: 100%|██████████| 157/157 [00:03<00:00, 46.02it/s]


epoch: 6, accuracy: 69.92


Epoch 7 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.49it/s, loss=0.31]
Epoch 7 - Testing: 100%|██████████| 157/157 [00:03<00:00, 51.79it/s]


epoch: 7, accuracy: 69.5


Epoch 8 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.47it/s, loss=0.997]
Epoch 8 - Testing: 100%|██████████| 157/157 [00:02<00:00, 56.45it/s]


epoch: 8, accuracy: 70.44


Epoch 9 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.28it/s, loss=0.303]
Epoch 9 - Testing: 100%|██████████| 157/157 [00:02<00:00, 56.86it/s]


epoch: 9, accuracy: 69.41000000000001


Epoch 10 - Training: 100%|██████████| 782/782 [00:20<00:00, 37.98it/s, loss=0.197]
Epoch 10 - Testing: 100%|██████████| 157/157 [00:02<00:00, 56.63it/s]


epoch: 10, accuracy: 69.49


Epoch 11 - Training: 100%|██████████| 782/782 [00:19<00:00, 39.23it/s, loss=0.919]
Epoch 11 - Testing: 100%|██████████| 157/157 [00:03<00:00, 43.83it/s]


epoch: 11, accuracy: 68.46


Epoch 12 - Training: 100%|██████████| 782/782 [00:19<00:00, 39.15it/s, loss=0.285]
Epoch 12 - Testing: 100%|██████████| 157/157 [00:02<00:00, 54.37it/s]


epoch: 12, accuracy: 68.47


Epoch 13 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.20it/s, loss=0.0591]
Epoch 13 - Testing: 100%|██████████| 157/157 [00:02<00:00, 58.16it/s]


epoch: 13, accuracy: 67.86999999999999


Epoch 14 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.21it/s, loss=0.156]
Epoch 14 - Testing: 100%|██████████| 157/157 [00:02<00:00, 53.85it/s]


epoch: 14, accuracy: 68.28999999999999


Epoch 15 - Training: 100%|██████████| 782/782 [00:19<00:00, 39.33it/s, loss=0.861]
Epoch 15 - Testing: 100%|██████████| 157/157 [00:03<00:00, 47.86it/s]


epoch: 15, accuracy: 68.56


Epoch 16 - Training: 100%|██████████| 782/782 [00:19<00:00, 39.22it/s, loss=0.41]
Epoch 16 - Testing: 100%|██████████| 157/157 [00:03<00:00, 49.23it/s]


epoch: 16, accuracy: 67.63


Epoch 17 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.77it/s, loss=0.178]
Epoch 17 - Testing: 100%|██████████| 157/157 [00:02<00:00, 58.09it/s]


epoch: 17, accuracy: 68.31


Epoch 18 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.29it/s, loss=0.216]
Epoch 18 - Testing: 100%|██████████| 157/157 [00:02<00:00, 57.18it/s]


epoch: 18, accuracy: 68.26


Epoch 19 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.27it/s, loss=0.0264]
Epoch 19 - Testing: 100%|██████████| 157/157 [00:02<00:00, 52.75it/s]


epoch: 19, accuracy: 67.14


Epoch 20 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.98it/s, loss=0.105]
Epoch 20 - Testing: 100%|██████████| 157/157 [00:03<00:00, 45.93it/s]


epoch: 20, accuracy: 67.80000000000001


Epoch 21 - Training: 100%|██████████| 782/782 [00:20<00:00, 38.95it/s, loss=0.0492]
Epoch 21 - Testing: 100%|██████████| 157/157 [00:02<00:00, 58.91it/s]

epoch: 21, accuracy: 67.66



