In [None]:
!pip install pytorch_lightning
import pytorch_lightning as pl
pl.__version__

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms, models
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from sklearn.metrics import average_precision_score, roc_auc_score
from torch.utils.data import DataLoader
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import glob

In [None]:
weights_url = "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth"
!wget -nc {weights_url}

In [None]:
epochs = 20
Labels = ['herd', 'not_herd']
batch_size = 32
efficientnet_weights_path = "efficientnet_b0_rwightman-7f5810bc.pth"
scheduler_step = 7
gamma = 0.1
IMAGE_MODE = 'RGB'


EfficientNet

In [None]:
class EfficientnetB0(nn.Module):
    def __init__(self, list_of_classes, weights_path=None):
        super(EfficientnetB0, self).__init__()
        self.list_of_classes = list_of_classes
        self.num_classes = len(self.list_of_classes)
        self.model = self.load_efficientnet(weights_path)
        self.model.classifier = nn.Sequential(
            nn.Dropout(0.2, inplace=True),
            nn.Linear(in_features=1280, out_features=self.num_classes, bias=True),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.model(x)
        return x

    def load_efficientnet(self, weights_path):
        model = models.efficientnet_b0(weights=None)
        if weights_path:
            print('Loading Weights')
            state_dict = torch.load(weights_path, weights_only=True)
            model.load_state_dict(state_dict)
        return model

Classifier

In [None]:
class Classifier(pl.LightningModule):
  def __init__(self, list_of_labels, learning_rate=1e-4, weight_decay=1e-4, scheduler_step_size=20, scheduler_gamma=0.1):
    super(Classifier, self).__init__()
    self.list_of_labels = list_of_labels
    self.num_classes = len(list_of_labels)
    self.model = EfficientnetB0(self.list_of_labels, efficientnet_weights_path)
    # self.model.classifier = nn.Sequential(
    #     nn.Dropout(0.2, inplace=True),
    #     nn.Linear(in_features=1280, out_features=self.num_classes, bias=True),
    #     nn.Sigmoid
    # )
    self.loss_fn = nn.BCELoss()
    self.learning_rate = learning_rate
    self.weight_decay = weight_decay
    self.scheduler_step_size = scheduler_step_size
    self.scheduler_gamma = scheduler_gamma
    self.train_outputs = []
    self.valid_outputs = []

  def forward(self, x):
    return self.model(x)

  def training_step(self, batch):
    image, label = batch
    image = image.float()
    label = label.to(torch.float)
    pred = self(image)
    loss = self.loss_fn(pred, label)

    self.log('train_loss', loss, prog_bar=True)
    self.train_outputs.append((label.cpu().numpy(), pred.cpu().detach().numpy()))

    return loss

  def validation_step(self, batch):
    image, label = batch
    image = image.float()
    label = label.to(torch.float)
    pred = self(image)
    loss = self.loss_fn(pred, label)

    self.log('val_loss', loss, prog_bar=True)
    self.valid_outputs.append((label.cpu().numpy(), pred.cpu().detach().numpy()))

    return loss

  def configure_optimizers(self):
    optimizer = optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=self.scheduler_step_size, gamma=self.scheduler_gamma)
    return [optimizer], [scheduler]

In [None]:
class Counter(Dataset):
  def __init__(self, list_of_classes, is_training, image_paths, image_labels):
    self.list_of_classes = list_of_classes
    self.num_classes = len(self.list_of_classes)
    self.class_to_index = {class_name: idx for idx, class_name in enumerate(list_of_classes)}
    self.image_paths = image_paths
    self.is_training = is_training
    self.image_labels = image_labels

    transform = transforms.Compose([
      transforms.Resize((224,224)),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
      ])

    if self.is_training:
      transform = transforms.Compose([
          transform,
          transforms.RandomHorizontalFlip(),
      ])

    self.transform = transform

  def __len__(self):
    return len(self.image_paths)

  def __getitem__(self, idx):
    image = Image.open(self.image_paths[idx])
    if image.mode != IMAGE_MODE:
      image = image.convert(IMAGE_MODE)
    image = self.transform(image)

    label = self.image_labels[idx]
    label = torch.nn.functional.one_hot(torch.tensor(label), num_classes=self.num_classes)

    return image, label

  @staticmethod
  def collate_fn(batch):
    images, labels = zip(*batch)
    images = torch.stack(images, dim=0)
    labels = torch.stack(labels, dim=0)
    return images, labels

class DataModule(pl.LightningDataModule):
  def __init__(self, list_of_classes, train_labels, val_labels, train_image_paths, val_image_paths, batch_size=batch_size):
    super().__init__()
    self.list_of_classes = list_of_classes
    self.train_labels = train_labels
    self.val_labels = val_labels
    self.train_image_paths = train_image_paths
    self.val_image_paths = val_image_paths
    self.batch_size = batch_size

  def prepare_data(self):
    pass

  def setup(self, stage=None):
    if stage == 'fit' or stage==None:
      self.train_dataset = Counter(
          list_of_classes=self.list_of_classes,
          is_training=True,
          image_paths=self.train_image_paths,
          image_labels=self.train_labels
          )
      self.val_dataset = Counter(
          list_of_classes=self.list_of_classes,
          is_training=False,
          image_paths=self.val_image_paths,
          image_labels=self.val_labels
      )

  def train_dataloader(self):
    return DataLoader(self.train_dataset,
                      batch_size=self.batch_size,
                      shuffle=True,
                      pin_memory=True,
                      num_workers=os.cpu_count(),
                      drop_last=True,
                      collate_fn=self.train_dataset.collate_fn
                      )

  def val_dataloader(self):
    return DataLoader(self.val_dataset,
                      batch_size=self.batch_size,
                      shuffle=False,
                      pin_memory=True,
                      num_workers=os.cpu_count(),
                      drop_last=True,
                      collate_fn=self.val_dataset.collate_fn
                      )


In [None]:
import os
from google.colab import userdata
os.environ['KAGGLE_USERNAME'] = userdata.get('KAGGLE_USERNAME')
os.environ['KAGGLE_KEY'] = userdata.get('KAGGLE_KEY')

In [None]:
import pandas as pd
import re
df = pd.read_csv('Herd_Data.csv')
df.shape

In [None]:
label_names = df['class'].unique().tolist()
label_names

In [None]:
df["category"] = (df['class'] == 'herd').astype(int)
df['filepath'][0]

In [None]:
part = r'^...-\d+/images/...\d{4}\.JPG$'
df[df['filepath'].apply(lambda x: not re.match(part, x))]

In [None]:
df['filepath'] = df['filepath'].str.strip('../')

In [None]:
df2 = pd.read_csv('Herd.csv')
df2.shape

In [None]:
image_paths = df['filepath'].tolist()
image_paths = [os.path.join('...', path) for path in image_paths]
labels = df['category'].tolist()

In [None]:
image_paths[:5]

In [None]:
image_paths

In [None]:
if os.path.exists(b_path):
  print('exists')

In [None]:
import re
train_image_paths = []
train_labels = []
val_image_paths = []
val_labels = []

for i in range(len(image_paths)):
  match = re.search(r'MC005-Batch-(\d+)', image_paths[i])
  if match:
    batch_number = int(match.group(1))
    if 1 <= batch_number <= 28:
      train_image_paths.append(image_paths[i])
      train_labels.append(labels[i])
    elif 29<= batch_number <= 32:
      val_image_paths.append(image_paths[i])
      val_labels.append(labels[i])

print(f"Train: {len(train_image_paths)} Val: {len(val_image_paths)}")

In [None]:
df

In [None]:
# prompt: Using dataframe df: change category to have 0 for herd and 1 for non-herd

# Create a mapping for the class categories to numerical values
class_mapping = {'herd': 0, 'non-herd': 1}

# Use the mapping to update the 'category' column
df['category'] = df['class'].map(class_mapping)


In [None]:
#data module
data_module = DataModule(label_names, train_labels, val_labels,
                              train_image_paths,val_image_paths, batch_size)

#model init
model = Classifier(label_names)

#checkpointing
checkpoint_callback = ModelCheckpoint(
    monitor= 'val_loss',
    dirpath='./checkpoints',
    save_top_k=1,
    mode='min',
    filename='classifier-{epoch:02d}-{val_loss:.2f}',
)
trainer = pl.Trainer(
    max_epochs = epochs,
    default_root_dir='model',
    #callbacks = [checkpoint_callback],
    #accelerator='auto',
    #strategy='ddp',

)
trainer.fit(model, data_module)

In [None]:
from tqdm import tqdm
import fiftyone as fo
import fiftyone.zoo as foz

In [None]:
import glob
batch_paths = glob.glob("/content/...*")

In [None]:
batches = os.listdir('/content/...')



for batch_path in tqdm(batch_paths):
    batch = os.path.basename(batch_path)
    batch_images = os.listdir(os.path.join(batch_path, 'images'))
    batch_image_paths = [os.path.join(batch_path, 'images', i) for i in batch_images]

    dataset = fo.Dataset(batch, overwrite=True)

    # Add the images to the dataset
    for img_path in batch_image_paths:
        try:
            sample = fo.Sample(filepath=img_path)
            dataset.add_sample(sample)
        except Exception as e:
            print(f"Error processing image {img_path}: {e}")

In [None]:
session = fo.launch_app(dataset, auto=False)

In [None]:
session.show()