In [1]:
import pandas as pd
import torch
import torch.nn as nn
import tensorflow as tf

from torch.utils.data import Dataset, DataLoader, Subset, random_split
from torchvision.io import read_image
import torchvision.models as models
import matplotlib.pyplot as plt
import pathlib

import albumentations as A

import torchvision.transforms as transforms
import os


In [2]:
!unzip public_tests.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: 00_test_img_input/test/images/0672.jpg  
  inflating: 00_test_img_input/test/images/2085.jpg  
  inflating: 00_test_img_input/test/images/0106.jpg  
  inflating: 00_test_img_input/test/images/1056.jpg  
  inflating: 00_test_img_input/test/images/1159.jpg  
  inflating: 00_test_img_input/test/images/0256.jpg  
  inflating: 00_test_img_input/test/images/2269.jpg  
  inflating: 00_test_img_input/test/images/0664.jpg  
  inflating: 00_test_img_input/test/images/1981.jpg  
  inflating: 00_test_img_input/test/images/0929.jpg  
  inflating: 00_test_img_input/test/images/1888.jpg  
  inflating: 00_test_img_input/test/images/0957.jpg  
  inflating: 00_test_img_input/test/images/0389.jpg  
  inflating: 00_test_img_input/test/images/0323.jpg  
  inflating: 00_test_img_input/test/images/0717.jpg  
  inflating: 00_test_img_input/test/images/0186.jpg  
  inflating: 00_test_img_input/test/images/0468.jpg  
  inflating: 00_t

In [98]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device("cuda")

model = models.mobilenet_v2(weights='IMAGENET1K_V2')
# model = model.to(device)
# model.cuda()

for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.classifier[-1].in_features
model.classifier[-1] = nn.Linear(num_ftrs, 50) # 50 species


In [99]:
# class CustomDataset(Dataset):
#     def __init__(self, root_dir, images_per_class=50, transform=None, train=True):
#         self.root_dir = root_dir
#         self.transform = transform
#         self.images_per_class = images_per_class
#         self.train = train

#         if self.train:
#             self.images = sorted(os.listdir(root_dir))
#         else:
#             self.images = sorted(os.listdir(root_dir))

#     def __len__(self):
#         return len(self.images)

#     def __getitem__(self, idx):
#         if self.train:
#             idx = int(idx)  # Ensure idx is an integer
#             img_name = os.path.join(self.root_dir, self.images[idx])
#             label = idx // self.images_per_class
#         else:
#             idx = int(idx)  # Ensure idx is an integer
#             img_name = os.path.join(self.root_dir, self.images[idx])
#             label = int(self.images[idx].split('.')[0]) // self.images_per_class

#         image = Image.open(img_name).convert('RGB')
#         if self.transform:
#             image = self.transform(image)

#         return image, label




class CustomDataset(Dataset):
    def __init__(self, root_dir, images_per_class=50, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images_per_class = images_per_class
        self.images = sorted(os.listdir(root_dir))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.images[idx])
        image = Image.open(img_name).convert('RGB')
        if self.transform:
            image = self.transform(image)
        label = idx // self.images_per_class
        return image, label

In [100]:
# Example usage:
# data_transform = transforms.Compose([
#     transforms.Resize((224, 224)),  # Resize images to the desired size
#     transforms.ToTensor(),  # Convert images to PyTorch tensors
#     transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize pixel values
# ])
data_transform = transforms.Compose([
        transforms.Resize(256, antialias=False),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.ConvertImageDtype(torch.float32),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

In [101]:
# import random

# num_train_per_class = 40
# num_val_per_class = 10

# # List to store indices of images for training and validation
# train_indices = []
# val_indices = []

# # Iterate over classes
# for class_idx in range(50):
#     # List all image filenames for the current class
#     class_images = [f'{class_idx:04d}.jpg' for class_idx in range(class_idx * 50 + 1, (class_idx + 1) * 50 + 1)]
#     # Shuffle the image filenames
#     random.shuffle(class_images)
#     # Assign indices for training and validation sets
#     train_indices.extend(class_images[:num_train_per_class])
#     val_indices.extend(class_images[num_train_per_class:])

# # Create subsets for training and validation
# train_dataset = Subset(CustomDataset(root_dir='00_test_img_input/train/images'), train_indices)
# val_dataset = Subset(CustomDataset(root_dir='00_test_img_input/train/images'), val_indices)

# # Create data loaders for training and validation
# train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)

In [102]:
# train_data = CustomDataset('00_test_img_input/train/images', images_per_class=50, transform=data_transform)
# train_data_loader = DataLoader(train_data, batch_size=64, shuffle=True)

# valid_data = CustomDataset('00_test_img_input/test/images', images_per_class=50, transform=data_transform)
# valid_data_loader = DataLoader(valid_data, batch_size=64, shuffle=True)

data = CustomDataset('00_test_img_input/train/images', images_per_class=50, transform=data_transform)

train_size = int(0.9 * len(data))
valid_size = len(data) - train_size

train_data, valid_data = random_split(data, [train_size, valid_size])

train_data_loader = DataLoader(train_data, batch_size=64, shuffle=True)
valid_data_loader = DataLoader(valid_data, batch_size=64, shuffle=True)

In [103]:
# for inputs, targets in train_data_loader:
#     inputs, targets = inputs.to(device), targets.to(device)

# for inputs, targets in valid_data_loader:
#     inputs, targets = inputs.to(device), targets.to(device)

In [104]:
from torch.optim import Adam

In [105]:
creterion = nn.CrossEntropyLoss()
# creterion.to(device)
optimizer = Adam(model.parameters())
# optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [106]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score
from PIL import Image

In [107]:
num_epochs = 15
best_accuracy = 0
early_stopping_counter = 0
early_stopping_limit = 5

for epoch in range(num_epochs):
  model.train()
  for x, y in tqdm(train_data_loader):
    # x, y = x.to(device), y.to(device)
    y_pred = model(x)
    loss = creterion(y_pred, y)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

  model.eval()
  val_predictions = []
  val_targets = []
  with torch.no_grad():
    for x, y in tqdm(valid_data_loader):
      # x, y = x.to(device), y.to(device)
      y_pred = model(x)
      val_predictions.extend(torch.argmax(y_pred, dim=1).cpu().tolist())
      val_targets.extend(y.cpu().y.tolist())

  accuracy = accuracy_score(val_targets, val_predictions)

  if accuracy > best_accuracy:
    best_accuracy = accuracy
    early_stopping_counter = 0
  else:
    early_stopping_counter += 1

  if early_stopping_counter == early_stopping_limit:
    print(f'Early stopping at epoch {epoch} due to lack of improvement in validation accuracy.')
    break

  print(f'Epoch {epoch}: Validation Accuracy: {accuracy}')

  0%|          | 0/36 [00:00<?, ?it/s]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)

In [None]:
# from torchvision import transforms
# from PIL import Image

# # base transformations
# transform = transforms.Compose([
#     transforms.Grayscale(),
#     transforms.ToTensor(),
# ])


# def predict_number(image_path):
#     image = Image.open(image_path)
#     image = transform(image).unsqueeze(0)

#     with torch.no_grad():
#         output = model(image)

#     _, predicted = torch.max(output, 1)
#     return predicted.item(), image

# image_paths = ["Centered_8.png", "Centered_3.png", "Uncentered_3.png"]

# for image_path in image_paths:
#     predicted_number, image = predict_number(image_path)

#     plt.imshow(image.squeeze(), cmap='gray')
#     plt.title(f"Predicted Number: {predicted_number}")
#     plt.axis('off')
#     plt.show()

# torch.save(model.state_dict(), "model_weights.pth")